clang 20.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CGValue.h"
21#include "CodeGenFunction.h"
22#include "CodeGenModule.h"
23#include "ConstantEmitter.h"
24#include "PatternInit.h"
25#include "TargetInfo.h"
27#include "clang/AST/Attr.h"
28#include "clang/AST/Decl.h"
29#include "clang/AST/Expr.h"
30#include "clang/AST/OSLog.h"
32#include "clang/AST/Type.h"
38#include "llvm/ADT/APFloat.h"
39#include "llvm/ADT/APInt.h"
40#include "llvm/ADT/FloatingPointMode.h"
41#include "llvm/ADT/SmallPtrSet.h"
42#include "llvm/ADT/StringExtras.h"
43#include "llvm/Analysis/ValueTracking.h"
44#include "llvm/IR/DataLayout.h"
45#include "llvm/IR/InlineAsm.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsBPF.h"
51#include "llvm/IR/IntrinsicsDirectX.h"
52#include "llvm/IR/IntrinsicsHexagon.h"
53#include "llvm/IR/IntrinsicsNVPTX.h"
54#include "llvm/IR/IntrinsicsPowerPC.h"
55#include "llvm/IR/IntrinsicsR600.h"
56#include "llvm/IR/IntrinsicsRISCV.h"
57#include "llvm/IR/IntrinsicsS390.h"
58#include "llvm/IR/IntrinsicsWebAssembly.h"
59#include "llvm/IR/IntrinsicsX86.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/MatrixBuilder.h"
62#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
63#include "llvm/Support/AMDGPUAddrSpace.h"
64#include "llvm/Support/ConvertUTF.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/ScopedPrinter.h"
67#include "llvm/TargetParser/AArch64TargetParser.h"
68#include "llvm/TargetParser/RISCVISAInfo.h"
69#include "llvm/TargetParser/RISCVTargetParser.h"
70#include "llvm/TargetParser/X86TargetParser.h"
71#include <numeric>
72#include <optional>
73#include <utility>
74
75using namespace clang;
76using namespace CodeGen;
77using namespace llvm;
78
79static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
80 Align AlignmentInBytes) {
81 ConstantInt *Byte;
82 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
83 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
84 // Nothing to initialize.
85 return;
86 case LangOptions::TrivialAutoVarInitKind::Zero:
87 Byte = CGF.Builder.getInt8(0x00);
88 break;
89 case LangOptions::TrivialAutoVarInitKind::Pattern: {
90 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
91 Byte = llvm::dyn_cast<llvm::ConstantInt>(
92 initializationPatternFor(CGF.CGM, Int8));
93 break;
94 }
95 }
96 if (CGF.CGM.stopAutoInit())
97 return;
98 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
99 I->addAnnotationMetadata("auto-init");
100}
101
103 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
104
105 Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
106 Value *CMP;
107 Value *LastInstr;
108
109 if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
110 FZeroConst = ConstantVector::getSplat(
111 ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
112 auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
113 CMP = CGF->Builder.CreateIntrinsic(
114 CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
115 {FCompInst}, nullptr);
116 } else
117 CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
118
119 if (CGF->CGM.getTarget().getTriple().isDXIL())
120 LastInstr = CGF->Builder.CreateIntrinsic(
121 CGF->VoidTy, llvm::Intrinsic::dx_discard, {CMP}, nullptr);
122 else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
123 BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
124 BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
125
126 CGF->Builder.CreateCondBr(CMP, LT0, End);
127
128 CGF->Builder.SetInsertPoint(LT0);
129
130 CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::spv_discard, {},
131 nullptr);
132
133 LastInstr = CGF->Builder.CreateBr(End);
134
135 CGF->Builder.SetInsertPoint(End);
136 } else {
137 llvm_unreachable("Backend Codegen not supported.");
138 }
139
140 return LastInstr;
141}
142
144 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
145 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
146 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
147
148 CallArgList Args;
149 LValue Op1TmpLValue =
150 CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
151 LValue Op2TmpLValue =
152 CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
153
155 Args.reverseWritebacks();
156
157 Value *LowBits = nullptr;
158 Value *HighBits = nullptr;
159
160 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
161
162 llvm::Type *RetElementTy = CGF->Int32Ty;
163 if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
164 RetElementTy = llvm::VectorType::get(
165 CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
166 auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
167
168 CallInst *CI = CGF->Builder.CreateIntrinsic(
169 RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
170
171 LowBits = CGF->Builder.CreateExtractValue(CI, 0);
172 HighBits = CGF->Builder.CreateExtractValue(CI, 1);
173
174 } else {
175 // For Non DXIL targets we generate the instructions.
176
177 if (!Op0->getType()->isVectorTy()) {
178 FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
179 Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
180
181 LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
182 HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
183 } else {
184 int NumElements = 1;
185 if (const auto *VecTy =
186 E->getArg(0)->getType()->getAs<clang::VectorType>())
187 NumElements = VecTy->getNumElements();
188
189 FixedVectorType *Uint32VecTy =
190 FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
191 Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
192 if (NumElements == 1) {
193 LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
194 HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
195 } else {
196 SmallVector<int> EvenMask, OddMask;
197 for (int I = 0, E = NumElements; I != E; ++I) {
198 EvenMask.push_back(I * 2);
199 OddMask.push_back(I * 2 + 1);
200 }
201 LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
202 HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
203 }
204 }
205 }
206 CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
207 auto *LastInst =
208 CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
209 CGF->EmitWritebacks(Args);
210 return LastInst;
211}
212
214 assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
215 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
216 "asdouble operands types mismatch");
217 Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0));
218 Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1));
219
220 llvm::Type *ResultType = CGF.DoubleTy;
221 int N = 1;
222 if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
223 N = VTy->getNumElements();
224 ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
225 }
226
227 if (CGF.CGM.getTarget().getTriple().isDXIL())
228 return CGF.Builder.CreateIntrinsic(
229 /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
230 ArrayRef<Value *>{OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
231
232 if (!E->getArg(0)->getType()->isVectorType()) {
233 OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
234 OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
235 }
236
238 for (int i = 0; i < N; i++) {
239 Mask.push_back(i);
240 Mask.push_back(i + N);
241 }
242
243 Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
244
245 return CGF.Builder.CreateBitCast(BitVec, ResultType);
246}
247
248/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
249/// return it as an i8 pointer.
251 LLVMContext &Context = CGF.CGM.getLLVMContext();
252 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
253 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
254 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
255 llvm::Function *F =
256 CGF.CGM.getIntrinsic(llvm::Intrinsic::read_register, {CGF.Int64Ty});
257 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
258 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
259}
260
261/// getBuiltinLibFunction - Given a builtin id for a function like
262/// "__builtin_fabsf", return a Function* for "fabsf".
264 unsigned BuiltinID) {
265 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
266
267 // Get the name, skip over the __builtin_ prefix (if necessary).
268 StringRef Name;
269 GlobalDecl D(FD);
270
271 // TODO: This list should be expanded or refactored after all GCC-compatible
272 // std libcall builtins are implemented.
273 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
274 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
275 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
276 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
277 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
278 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
279 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
280 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
281 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
282 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
283 {Builtin::BI__builtin_printf, "__printfieee128"},
284 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
285 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
286 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
287 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
288 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
289 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
290 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
291 {Builtin::BI__builtin_scanf, "__scanfieee128"},
292 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
293 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
294 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
295 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
296 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
297 };
298
299 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
300 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
301 // if it is 64-bit 'long double' mode.
302 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
303 {Builtin::BI__builtin_frexpl, "frexp"},
304 {Builtin::BI__builtin_ldexpl, "ldexp"},
305 {Builtin::BI__builtin_modfl, "modf"},
306 };
307
308 // If the builtin has been declared explicitly with an assembler label,
309 // use the mangled name. This differs from the plain label on platforms
310 // that prefix labels.
311 if (FD->hasAttr<AsmLabelAttr>())
312 Name = getMangledName(D);
313 else {
314 // TODO: This mutation should also be applied to other targets other than
315 // PPC, after backend supports IEEE 128-bit style libcalls.
316 if (getTriple().isPPC64() &&
317 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
318 F128Builtins.contains(BuiltinID))
319 Name = F128Builtins[BuiltinID];
320 else if (getTriple().isOSAIX() &&
321 &getTarget().getLongDoubleFormat() ==
322 &llvm::APFloat::IEEEdouble() &&
323 AIXLongDouble64Builtins.contains(BuiltinID))
324 Name = AIXLongDouble64Builtins[BuiltinID];
325 else
326 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
327 }
328
329 llvm::FunctionType *Ty =
330 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
331
332 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
333}
334
335/// Emit the conversions required to turn the given value into an
336/// integer of the given size.
337static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
338 QualType T, llvm::IntegerType *IntType) {
339 V = CGF.EmitToMemory(V, T);
340
341 if (V->getType()->isPointerTy())
342 return CGF.Builder.CreatePtrToInt(V, IntType);
343
344 assert(V->getType() == IntType);
345 return V;
346}
347
348static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
349 QualType T, llvm::Type *ResultType) {
350 V = CGF.EmitFromMemory(V, T);
351
352 if (ResultType->isPointerTy())
353 return CGF.Builder.CreateIntToPtr(V, ResultType);
354
355 assert(V->getType() == ResultType);
356 return V;
357}
358
360 ASTContext &Ctx = CGF.getContext();
361 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
362 unsigned Bytes = Ptr.getElementType()->isPointerTy()
364 : Ptr.getElementType()->getScalarSizeInBits() / 8;
365 unsigned Align = Ptr.getAlignment().getQuantity();
366 if (Align % Bytes != 0) {
367 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
368 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
369 // Force address to be at least naturally-aligned.
370 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
371 }
372 return Ptr;
373}
374
375/// Utility to insert an atomic instruction based on Intrinsic::ID
376/// and the expression node.
378 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
379 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
380
381 QualType T = E->getType();
382 assert(E->getArg(0)->getType()->isPointerType());
384 E->getArg(0)->getType()->getPointeeType()));
385 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
386
387 Address DestAddr = CheckAtomicAlignment(CGF, E);
388
389 llvm::IntegerType *IntType = llvm::IntegerType::get(
390 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
391
392 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
393 llvm::Type *ValueType = Val->getType();
394 Val = EmitToInt(CGF, Val, T, IntType);
395
396 llvm::Value *Result =
397 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
398 return EmitFromInt(CGF, Result, T, ValueType);
399}
400
402 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
403 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
404
405 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
406 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
407 LV.setNontemporal(true);
408 CGF.EmitStoreOfScalar(Val, LV, false);
409 return nullptr;
410}
411
413 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
414
415 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
416 LV.setNontemporal(true);
417 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
418}
419
421 llvm::AtomicRMWInst::BinOp Kind,
422 const CallExpr *E) {
423 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
424}
425
426/// Utility to insert an atomic instruction based Intrinsic::ID and
427/// the expression node, where the return value is the result of the
428/// operation.
430 llvm::AtomicRMWInst::BinOp Kind,
431 const CallExpr *E,
432 Instruction::BinaryOps Op,
433 bool Invert = false) {
434 QualType T = E->getType();
435 assert(E->getArg(0)->getType()->isPointerType());
437 E->getArg(0)->getType()->getPointeeType()));
438 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
439
440 Address DestAddr = CheckAtomicAlignment(CGF, E);
441
442 llvm::IntegerType *IntType = llvm::IntegerType::get(
443 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
444
445 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
446 llvm::Type *ValueType = Val->getType();
447 Val = EmitToInt(CGF, Val, T, IntType);
448
449 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
450 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
451 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
452 if (Invert)
453 Result =
454 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
455 llvm::ConstantInt::getAllOnesValue(IntType));
456 Result = EmitFromInt(CGF, Result, T, ValueType);
457 return RValue::get(Result);
458}
459
460/// Utility to insert an atomic cmpxchg instruction.
461///
462/// @param CGF The current codegen function.
463/// @param E Builtin call expression to convert to cmpxchg.
464/// arg0 - address to operate on
465/// arg1 - value to compare with
466/// arg2 - new value
467/// @param ReturnBool Specifies whether to return success flag of
468/// cmpxchg result or the old value.
469///
470/// @returns result of cmpxchg, according to ReturnBool
471///
472/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
473/// invoke the function EmitAtomicCmpXchgForMSIntrin.
475 bool ReturnBool) {
476 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
477 Address DestAddr = CheckAtomicAlignment(CGF, E);
478
479 llvm::IntegerType *IntType = llvm::IntegerType::get(
480 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
481
482 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
483 llvm::Type *ValueType = Cmp->getType();
484 Cmp = EmitToInt(CGF, Cmp, T, IntType);
485 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
486
488 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
489 llvm::AtomicOrdering::SequentiallyConsistent);
490 if (ReturnBool)
491 // Extract boolean success flag and zext it to int.
492 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
493 CGF.ConvertType(E->getType()));
494 else
495 // Extract old value and emit it using the same type as compare value.
496 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
497 ValueType);
498}
499
500/// This function should be invoked to emit atomic cmpxchg for Microsoft's
501/// _InterlockedCompareExchange* intrinsics which have the following signature:
502/// T _InterlockedCompareExchange(T volatile *Destination,
503/// T Exchange,
504/// T Comparand);
505///
506/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
507/// cmpxchg *Destination, Comparand, Exchange.
508/// So we need to swap Comparand and Exchange when invoking
509/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
510/// function MakeAtomicCmpXchgValue since it expects the arguments to be
511/// already swapped.
512
513static
515 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
516 assert(E->getArg(0)->getType()->isPointerType());
518 E->getType(), E->getArg(0)->getType()->getPointeeType()));
520 E->getArg(1)->getType()));
522 E->getArg(2)->getType()));
523
524 Address DestAddr = CheckAtomicAlignment(CGF, E);
525
526 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
527 auto *RTy = Exchange->getType();
528
529 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
530
531 if (RTy->isPointerTy()) {
532 Exchange = CGF.Builder.CreatePtrToInt(Exchange, CGF.IntPtrTy);
533 Comparand = CGF.Builder.CreatePtrToInt(Comparand, CGF.IntPtrTy);
534 }
535
536 // For Release ordering, the failure ordering should be Monotonic.
537 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
538 AtomicOrdering::Monotonic :
539 SuccessOrdering;
540
541 // The atomic instruction is marked volatile for consistency with MSVC. This
542 // blocks the few atomics optimizations that LLVM has. If we want to optimize
543 // _Interlocked* operations in the future, we will have to remove the volatile
544 // marker.
545 auto *CmpXchg = CGF.Builder.CreateAtomicCmpXchg(
546 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
547 CmpXchg->setVolatile(true);
548
549 auto *Result = CGF.Builder.CreateExtractValue(CmpXchg, 0);
550 if (RTy->isPointerTy()) {
551 Result = CGF.Builder.CreateIntToPtr(Result, RTy);
552 }
553
554 return Result;
555}
556
557// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
558// prototyped like this:
559//
560// unsigned char _InterlockedCompareExchange128...(
561// __int64 volatile * _Destination,
562// __int64 _ExchangeHigh,
563// __int64 _ExchangeLow,
564// __int64 * _ComparandResult);
565//
566// Note that Destination is assumed to be at least 16-byte aligned, despite
567// being typed int64.
568
570 const CallExpr *E,
571 AtomicOrdering SuccessOrdering) {
572 assert(E->getNumArgs() == 4);
573 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
574 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
575 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
576 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
577
578 assert(DestPtr->getType()->isPointerTy());
579 assert(!ExchangeHigh->getType()->isPointerTy());
580 assert(!ExchangeLow->getType()->isPointerTy());
581
582 // For Release ordering, the failure ordering should be Monotonic.
583 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
584 ? AtomicOrdering::Monotonic
585 : SuccessOrdering;
586
587 // Convert to i128 pointers and values. Alignment is also overridden for
588 // destination pointer.
589 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
590 Address DestAddr(DestPtr, Int128Ty,
592 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
593
594 // (((i128)hi) << 64) | ((i128)lo)
595 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
596 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
597 ExchangeHigh =
598 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
599 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
600
601 // Load the comparand for the instruction.
602 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
603
604 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
605 SuccessOrdering, FailureOrdering);
606
607 // The atomic instruction is marked volatile for consistency with MSVC. This
608 // blocks the few atomics optimizations that LLVM has. If we want to optimize
609 // _Interlocked* operations in the future, we will have to remove the volatile
610 // marker.
611 CXI->setVolatile(true);
612
613 // Store the result as an outparameter.
614 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
615 ComparandAddr);
616
617 // Get the success boolean and zero extend it to i8.
618 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
619 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
620}
621
623 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
624 assert(E->getArg(0)->getType()->isPointerType());
625
626 auto *IntTy = CGF.ConvertType(E->getType());
627 Address DestAddr = CheckAtomicAlignment(CGF, E);
628 auto *Result = CGF.Builder.CreateAtomicRMW(
629 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
630 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
631}
632
634 CodeGenFunction &CGF, const CallExpr *E,
635 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
636 assert(E->getArg(0)->getType()->isPointerType());
637
638 auto *IntTy = CGF.ConvertType(E->getType());
639 Address DestAddr = CheckAtomicAlignment(CGF, E);
640 auto *Result = CGF.Builder.CreateAtomicRMW(
641 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
642 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
643}
644
645// Build a plain volatile load.
647 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
648 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
649 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
650 llvm::Type *ITy =
651 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
652 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
653 Load->setVolatile(true);
654 return Load;
655}
656
657// Build a plain volatile store.
659 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
660 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
661 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
662 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
663 llvm::StoreInst *Store =
664 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
665 Store->setVolatile(true);
666 return Store;
667}
668
669// Emit a simple mangled intrinsic that has 1 argument and a return type
670// matching the argument type. Depending on mode, this may be a constrained
671// floating-point intrinsic.
673 const CallExpr *E, unsigned IntrinsicID,
674 unsigned ConstrainedIntrinsicID) {
675 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
676
677 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
678 if (CGF.Builder.getIsFPConstrained()) {
679 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
680 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
681 } else {
682 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
683 return CGF.Builder.CreateCall(F, Src0);
684 }
685}
686
687// Emit an intrinsic that has 2 operands of the same type as its result.
688// Depending on mode, this may be a constrained floating-point intrinsic.
690 const CallExpr *E, unsigned IntrinsicID,
691 unsigned ConstrainedIntrinsicID) {
692 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
693 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
694
695 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
696 if (CGF.Builder.getIsFPConstrained()) {
697 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
698 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
699 } else {
700 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
701 return CGF.Builder.CreateCall(F, { Src0, Src1 });
702 }
703}
704
705// Has second type mangled argument.
707 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
708 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
709 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
710 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
711
712 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
713 if (CGF.Builder.getIsFPConstrained()) {
714 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
715 {Src0->getType(), Src1->getType()});
716 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
717 }
718
719 Function *F =
720 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
721 return CGF.Builder.CreateCall(F, {Src0, Src1});
722}
723
724// Emit an intrinsic that has 3 operands of the same type as its result.
725// Depending on mode, this may be a constrained floating-point intrinsic.
727 const CallExpr *E, unsigned IntrinsicID,
728 unsigned ConstrainedIntrinsicID) {
729 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
730 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
731 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
732
733 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
734 if (CGF.Builder.getIsFPConstrained()) {
735 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
736 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
737 } else {
738 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
739 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
740 }
741}
742
743// Emit an intrinsic where all operands are of the same type as the result.
744// Depending on mode, this may be a constrained floating-point intrinsic.
746 unsigned IntrinsicID,
747 unsigned ConstrainedIntrinsicID,
748 llvm::Type *Ty,
749 ArrayRef<Value *> Args) {
750 Function *F;
751 if (CGF.Builder.getIsFPConstrained())
752 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
753 else
754 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
755
756 if (CGF.Builder.getIsFPConstrained())
757 return CGF.Builder.CreateConstrainedFPCall(F, Args);
758 else
759 return CGF.Builder.CreateCall(F, Args);
760}
761
762// Emit a simple intrinsic that has N scalar arguments and a return type
763// matching the argument type. It is assumed that only the first argument is
764// overloaded.
765template <unsigned N>
767 const CallExpr *E,
768 unsigned IntrinsicID,
769 llvm::StringRef Name = "") {
770 static_assert(N, "expect non-empty argument");
772 for (unsigned I = 0; I < N; ++I)
773 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
774 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
775 return CGF.Builder.CreateCall(F, Args, Name);
776}
777
778// Emit an intrinsic that has 1 float or double operand, and 1 integer.
780 const CallExpr *E,
781 unsigned IntrinsicID) {
782 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
783 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
784
785 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
786 return CGF.Builder.CreateCall(F, {Src0, Src1});
787}
788
789// Emit an intrinsic that has overloaded integer result and fp operand.
790static Value *
792 unsigned IntrinsicID,
793 unsigned ConstrainedIntrinsicID) {
794 llvm::Type *ResultType = CGF.ConvertType(E->getType());
795 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
796
797 if (CGF.Builder.getIsFPConstrained()) {
798 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
799 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
800 {ResultType, Src0->getType()});
801 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
802 } else {
803 Function *F =
804 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
805 return CGF.Builder.CreateCall(F, Src0);
806 }
807}
808
810 llvm::Intrinsic::ID IntrinsicID) {
811 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
812 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
813
814 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
815 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
816 llvm::Function *F =
817 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
818 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
819
820 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
821 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
822 CGF.EmitStoreOfScalar(Exp, LV);
823
824 return CGF.Builder.CreateExtractValue(Call, 0);
825}
826
828 llvm::Intrinsic::ID IntrinsicID) {
829 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0));
830 llvm::Value *Dest0 = CGF.EmitScalarExpr(E->getArg(1));
831 llvm::Value *Dest1 = CGF.EmitScalarExpr(E->getArg(2));
832
833 llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Val->getType()});
834 llvm::Value *Call = CGF.Builder.CreateCall(F, Val);
835
836 llvm::Value *SinResult = CGF.Builder.CreateExtractValue(Call, 0);
837 llvm::Value *CosResult = CGF.Builder.CreateExtractValue(Call, 1);
838
839 QualType DestPtrType = E->getArg(1)->getType()->getPointeeType();
840 LValue SinLV = CGF.MakeNaturalAlignAddrLValue(Dest0, DestPtrType);
841 LValue CosLV = CGF.MakeNaturalAlignAddrLValue(Dest1, DestPtrType);
842
843 llvm::StoreInst *StoreSin =
844 CGF.Builder.CreateStore(SinResult, SinLV.getAddress());
845 llvm::StoreInst *StoreCos =
846 CGF.Builder.CreateStore(CosResult, CosLV.getAddress());
847
848 // Mark the two stores as non-aliasing with each other. The order of stores
849 // emitted by this builtin is arbitrary, enforcing a particular order will
850 // prevent optimizations later on.
851 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
852 MDNode *Domain = MDHelper.createAnonymousAliasScopeDomain();
853 MDNode *AliasScope = MDHelper.createAnonymousAliasScope(Domain);
854 MDNode *AliasScopeList = MDNode::get(Call->getContext(), AliasScope);
855 StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList);
856 StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
857}
858
859/// EmitFAbs - Emit a call to @llvm.fabs().
861 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
862 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
863 Call->setDoesNotAccessMemory();
864 return Call;
865}
866
867/// Emit the computation of the sign bit for a floating point value. Returns
868/// the i1 sign bit value.
870 LLVMContext &C = CGF.CGM.getLLVMContext();
871
872 llvm::Type *Ty = V->getType();
873 int Width = Ty->getPrimitiveSizeInBits();
874 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
875 V = CGF.Builder.CreateBitCast(V, IntTy);
876 if (Ty->isPPC_FP128Ty()) {
877 // We want the sign bit of the higher-order double. The bitcast we just
878 // did works as if the double-double was stored to memory and then
879 // read as an i128. The "store" will put the higher-order double in the
880 // lower address in both little- and big-Endian modes, but the "load"
881 // will treat those bits as a different part of the i128: the low bits in
882 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
883 // we need to shift the high bits down to the low before truncating.
884 Width >>= 1;
885 if (CGF.getTarget().isBigEndian()) {
886 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
887 V = CGF.Builder.CreateLShr(V, ShiftCst);
888 }
889 // We are truncating value in order to extract the higher-order
890 // double, which we will be using to extract the sign from.
891 IntTy = llvm::IntegerType::get(C, Width);
892 V = CGF.Builder.CreateTrunc(V, IntTy);
893 }
894 Value *Zero = llvm::Constant::getNullValue(IntTy);
895 return CGF.Builder.CreateICmpSLT(V, Zero);
896}
897
898/// Checks no arguments or results are passed indirectly in the ABI (i.e. via a
899/// hidden pointer). This is used to check annotating FP libcalls (that could
900/// set `errno`) with "int" TBAA metadata is safe. If any floating-point
901/// arguments are passed indirectly, setup for the call could be incorrectly
902/// optimized out.
904 auto IsIndirect = [&](ABIArgInfo const &info) {
905 return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca();
906 };
907 return !IsIndirect(FnInfo.getReturnInfo()) &&
908 llvm::none_of(FnInfo.arguments(),
909 [&](CGFunctionInfoArgInfo const &ArgInfo) {
910 return IsIndirect(ArgInfo.info);
911 });
912}
913
915 const CallExpr *E, llvm::Constant *calleeValue) {
916 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
917 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
918 llvm::CallBase *callOrInvoke = nullptr;
919 CGFunctionInfo const *FnInfo = nullptr;
920 RValue Call =
921 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(),
922 /*Chain=*/nullptr, &callOrInvoke, &FnInfo);
923
924 if (unsigned BuiltinID = FD->getBuiltinID()) {
925 // Check whether a FP math builtin function, such as BI__builtin_expf
926 ASTContext &Context = CGF.getContext();
927 bool ConstWithoutErrnoAndExceptions =
929 // Restrict to target with errno, for example, MacOS doesn't set errno.
930 // TODO: Support builtin function with complex type returned, eg: cacosh
931 if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno &&
932 !CGF.Builder.getIsFPConstrained() && Call.isScalar() &&
934 // Emit "int" TBAA metadata on FP math libcalls.
935 clang::QualType IntTy = Context.IntTy;
936 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
937 CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo);
938 }
939 }
940 return Call;
941}
942
943/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
944/// depending on IntrinsicID.
945///
946/// \arg CGF The current codegen function.
947/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
948/// \arg X The first argument to the llvm.*.with.overflow.*.
949/// \arg Y The second argument to the llvm.*.with.overflow.*.
950/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
951/// \returns The result (i.e. sum/product) returned by the intrinsic.
952static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
953 const llvm::Intrinsic::ID IntrinsicID,
954 llvm::Value *X, llvm::Value *Y,
955 llvm::Value *&Carry) {
956 // Make sure we have integers of the same width.
957 assert(X->getType() == Y->getType() &&
958 "Arguments must be the same type. (Did you forget to make sure both "
959 "arguments have the same integer width?)");
960
961 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
962 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
963 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
964 return CGF.Builder.CreateExtractValue(Tmp, 0);
965}
966
967static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
968 int low, int high) {
969 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
970 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
971 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
972 Call->addRangeRetAttr(CR);
973 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
974 return Call;
975}
976
977namespace {
978 struct WidthAndSignedness {
979 unsigned Width;
980 bool Signed;
981 };
982}
983
984static WidthAndSignedness
986 const clang::QualType Type) {
987 assert(Type->isIntegerType() && "Given type is not an integer.");
988 unsigned Width = context.getIntWidth(Type);
990 return {Width, Signed};
991}
992
993// Given one or more integer types, this function produces an integer type that
994// encompasses them: any value in one of the given types could be expressed in
995// the encompassing type.
996static struct WidthAndSignedness
997EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
998 assert(Types.size() > 0 && "Empty list of types.");
999
1000 // If any of the given types is signed, we must return a signed type.
1001 bool Signed = false;
1002 for (const auto &Type : Types) {
1003 Signed |= Type.Signed;
1004 }
1005
1006 // The encompassing type must have a width greater than or equal to the width
1007 // of the specified types. Additionally, if the encompassing type is signed,
1008 // its width must be strictly greater than the width of any unsigned types
1009 // given.
1010 unsigned Width = 0;
1011 for (const auto &Type : Types) {
1012 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
1013 if (Width < MinWidth) {
1014 Width = MinWidth;
1015 }
1016 }
1017
1018 return {Width, Signed};
1019}
1020
1021Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
1022 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
1023 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
1024 ArgValue);
1025}
1026
1027/// Checks if using the result of __builtin_object_size(p, @p From) in place of
1028/// __builtin_object_size(p, @p To) is correct
1029static bool areBOSTypesCompatible(int From, int To) {
1030 // Note: Our __builtin_object_size implementation currently treats Type=0 and
1031 // Type=2 identically. Encoding this implementation detail here may make
1032 // improving __builtin_object_size difficult in the future, so it's omitted.
1033 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
1034}
1035
1036static llvm::Value *
1037getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
1038 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
1039}
1040
1041llvm::Value *
1042CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
1043 llvm::IntegerType *ResType,
1044 llvm::Value *EmittedE,
1045 bool IsDynamic) {
1046 uint64_t ObjectSize;
1047 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
1048 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
1049 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
1050}
1051
1053 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
1054 uint64_t &Offset) {
1055 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
1056 getLangOpts().getStrictFlexArraysLevel();
1057 uint32_t FieldNo = 0;
1058
1059 if (RD->isImplicit())
1060 return nullptr;
1061
1062 for (const FieldDecl *FD : RD->fields()) {
1063 if ((!FAMDecl || FD == FAMDecl) &&
1065 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
1066 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
1067 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1068 Offset += Layout.getFieldOffset(FieldNo);
1069 return FD;
1070 }
1071
1072 QualType Ty = FD->getType();
1073 if (Ty->isRecordType()) {
1075 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
1076 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1077 Offset += Layout.getFieldOffset(FieldNo);
1078 return Field;
1079 }
1080 }
1081
1082 if (!RD->isUnion())
1083 ++FieldNo;
1084 }
1085
1086 return nullptr;
1087}
1088
1089static unsigned CountCountedByAttrs(const RecordDecl *RD) {
1090 unsigned Num = 0;
1091
1092 for (const FieldDecl *FD : RD->fields()) {
1093 if (FD->getType()->isCountAttributedType())
1094 return ++Num;
1095
1096 QualType Ty = FD->getType();
1097 if (Ty->isRecordType())
1099 }
1100
1101 return Num;
1102}
1103
1104llvm::Value *
1105CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
1106 llvm::IntegerType *ResType) {
1107 // The code generated here calculates the size of a struct with a flexible
1108 // array member that uses the counted_by attribute. There are two instances
1109 // we handle:
1110 //
1111 // struct s {
1112 // unsigned long flags;
1113 // int count;
1114 // int array[] __attribute__((counted_by(count)));
1115 // }
1116 //
1117 // 1) bdos of the flexible array itself:
1118 //
1119 // __builtin_dynamic_object_size(p->array, 1) ==
1120 // p->count * sizeof(*p->array)
1121 //
1122 // 2) bdos of a pointer into the flexible array:
1123 //
1124 // __builtin_dynamic_object_size(&p->array[42], 1) ==
1125 // (p->count - 42) * sizeof(*p->array)
1126 //
1127 // 2) bdos of the whole struct, including the flexible array:
1128 //
1129 // __builtin_dynamic_object_size(p, 1) ==
1130 // max(sizeof(struct s),
1131 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1132 //
1133 ASTContext &Ctx = getContext();
1134 const Expr *Base = E->IgnoreParenImpCasts();
1135 const Expr *Idx = nullptr;
1136
1137 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
1138 UO && UO->getOpcode() == UO_AddrOf) {
1139 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
1140 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
1141 Base = ASE->getBase()->IgnoreParenImpCasts();
1142 Idx = ASE->getIdx()->IgnoreParenImpCasts();
1143
1144 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
1145 int64_t Val = IL->getValue().getSExtValue();
1146 if (Val < 0)
1147 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1148
1149 if (Val == 0)
1150 // The index is 0, so we don't need to take it into account.
1151 Idx = nullptr;
1152 }
1153 } else {
1154 // Potential pointer to another element in the struct.
1155 Base = SubExpr;
1156 }
1157 }
1158
1159 // Get the flexible array member Decl.
1160 const RecordDecl *OuterRD = nullptr;
1161 const FieldDecl *FAMDecl = nullptr;
1162 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
1163 // Check if \p Base is referencing the FAM itself.
1164 const ValueDecl *VD = ME->getMemberDecl();
1166 FAMDecl = dyn_cast<FieldDecl>(VD);
1167 if (!FAMDecl)
1168 return nullptr;
1169 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
1170 // Check if we're pointing to the whole struct.
1171 QualType Ty = DRE->getDecl()->getType();
1172 if (Ty->isPointerType())
1173 Ty = Ty->getPointeeType();
1174 OuterRD = Ty->getAsRecordDecl();
1175
1176 // If we have a situation like this:
1177 //
1178 // struct union_of_fams {
1179 // int flags;
1180 // union {
1181 // signed char normal_field;
1182 // struct {
1183 // int count1;
1184 // int arr1[] __counted_by(count1);
1185 // };
1186 // struct {
1187 // signed char count2;
1188 // int arr2[] __counted_by(count2);
1189 // };
1190 // };
1191 // };
1192 //
1193 // We don't know which 'count' to use in this scenario:
1194 //
1195 // size_t get_size(struct union_of_fams *p) {
1196 // return __builtin_dynamic_object_size(p, 1);
1197 // }
1198 //
1199 // Instead of calculating a wrong number, we give up.
1200 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
1201 return nullptr;
1202 }
1203
1204 if (!OuterRD)
1205 return nullptr;
1206
1207 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
1208 // get its offset.
1209 uint64_t Offset = 0;
1210 FAMDecl =
1211 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
1212 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
1213
1214 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
1215 // No flexible array member found or it doesn't have the "counted_by"
1216 // attribute.
1217 return nullptr;
1218
1219 const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
1220 if (!CountedByFD)
1221 // Can't find the field referenced by the "counted_by" attribute.
1222 return nullptr;
1223
1224 if (isa<DeclRefExpr>(Base))
1225 // The whole struct is specificed in the __bdos. The calculation of the
1226 // whole size of the structure can be done in two ways:
1227 //
1228 // 1) sizeof(struct S) + count * sizeof(typeof(fam))
1229 // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
1230 //
1231 // The first will add additional padding after the end of the array,
1232 // allocation while the second method is more precise, but not quite
1233 // expected from programmers. See
1234 // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
1235 // discussion of the topic.
1236 //
1237 // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
1238 // structure. Therefore, because of the above issue, we'll choose to match
1239 // what GCC does for consistency's sake.
1240 return nullptr;
1241
1242 // Build a load of the counted_by field.
1243 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1244 Value *CountedByInst = EmitLoadOfCountedByField(Base, FAMDecl, CountedByFD);
1245 if (!CountedByInst)
1246 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1247
1248 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1249
1250 // Build a load of the index and subtract it from the count.
1251 Value *IdxInst = nullptr;
1252 if (Idx) {
1253 if (Idx->HasSideEffects(getContext()))
1254 // We can't have side-effects.
1255 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1256
1257 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1258 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1259 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1260
1261 // We go ahead with the calculation here. If the index turns out to be
1262 // negative, we'll catch it at the end.
1263 CountedByInst =
1264 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1265 }
1266
1267 // Calculate how large the flexible array member is in bytes.
1268 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1270 llvm::Constant *ElemSize =
1271 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1272 Value *Res =
1273 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1274 Res = Builder.CreateIntCast(Res, ResType, IsSigned);
1275
1276 // A negative \p IdxInst or \p CountedByInst means that the index lands
1277 // outside of the flexible array member. If that's the case, we want to
1278 // return 0.
1279 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1280 if (IdxInst)
1281 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1282
1283 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1284}
1285
1286/// Returns a Value corresponding to the size of the given expression.
1287/// This Value may be either of the following:
1288/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1289/// it)
1290/// - A call to the @llvm.objectsize intrinsic
1291///
1292/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1293/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1294/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1295llvm::Value *
1296CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1297 llvm::IntegerType *ResType,
1298 llvm::Value *EmittedE, bool IsDynamic) {
1299 // We need to reference an argument if the pointer is a parameter with the
1300 // pass_object_size attribute.
1301 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1302 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1303 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1304 if (Param != nullptr && PS != nullptr &&
1305 areBOSTypesCompatible(PS->getType(), Type)) {
1306 auto Iter = SizeArguments.find(Param);
1307 assert(Iter != SizeArguments.end());
1308
1309 const ImplicitParamDecl *D = Iter->second;
1310 auto DIter = LocalDeclMap.find(D);
1311 assert(DIter != LocalDeclMap.end());
1312
1313 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1314 getContext().getSizeType(), E->getBeginLoc());
1315 }
1316 }
1317
1318 if (IsDynamic) {
1319 // Emit special code for a flexible array member with the "counted_by"
1320 // attribute.
1321 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1322 return V;
1323 }
1324
1325 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1326 // evaluate E for side-effects. In either case, we shouldn't lower to
1327 // @llvm.objectsize.
1328 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1329 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1330
1331 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1332 assert(Ptr->getType()->isPointerTy() &&
1333 "Non-pointer passed to __builtin_object_size?");
1334
1335 Function *F =
1336 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1337
1338 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1339 Value *Min = Builder.getInt1((Type & 2) != 0);
1340 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1341 Value *NullIsUnknown = Builder.getTrue();
1342 Value *Dynamic = Builder.getInt1(IsDynamic);
1343 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1344}
1345
1346namespace {
1347/// A struct to generically describe a bit test intrinsic.
1348struct BitTest {
1349 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1350 enum InterlockingKind : uint8_t {
1351 Unlocked,
1352 Sequential,
1353 Acquire,
1354 Release,
1355 NoFence
1356 };
1357
1358 ActionKind Action;
1359 InterlockingKind Interlocking;
1360 bool Is64Bit;
1361
1362 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1363};
1364
1365} // namespace
1366
1367BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1368 switch (BuiltinID) {
1369 // Main portable variants.
1370 case Builtin::BI_bittest:
1371 return {TestOnly, Unlocked, false};
1372 case Builtin::BI_bittestandcomplement:
1373 return {Complement, Unlocked, false};
1374 case Builtin::BI_bittestandreset:
1375 return {Reset, Unlocked, false};
1376 case Builtin::BI_bittestandset:
1377 return {Set, Unlocked, false};
1378 case Builtin::BI_interlockedbittestandreset:
1379 return {Reset, Sequential, false};
1380 case Builtin::BI_interlockedbittestandset:
1381 return {Set, Sequential, false};
1382
1383 // X86-specific 64-bit variants.
1384 case Builtin::BI_bittest64:
1385 return {TestOnly, Unlocked, true};
1386 case Builtin::BI_bittestandcomplement64:
1387 return {Complement, Unlocked, true};
1388 case Builtin::BI_bittestandreset64:
1389 return {Reset, Unlocked, true};
1390 case Builtin::BI_bittestandset64:
1391 return {Set, Unlocked, true};
1392 case Builtin::BI_interlockedbittestandreset64:
1393 return {Reset, Sequential, true};
1394 case Builtin::BI_interlockedbittestandset64:
1395 return {Set, Sequential, true};
1396
1397 // ARM/AArch64-specific ordering variants.
1398 case Builtin::BI_interlockedbittestandset_acq:
1399 return {Set, Acquire, false};
1400 case Builtin::BI_interlockedbittestandset_rel:
1401 return {Set, Release, false};
1402 case Builtin::BI_interlockedbittestandset_nf:
1403 return {Set, NoFence, false};
1404 case Builtin::BI_interlockedbittestandreset_acq:
1405 return {Reset, Acquire, false};
1406 case Builtin::BI_interlockedbittestandreset_rel:
1407 return {Reset, Release, false};
1408 case Builtin::BI_interlockedbittestandreset_nf:
1409 return {Reset, NoFence, false};
1410 }
1411 llvm_unreachable("expected only bittest intrinsics");
1412}
1413
1414static char bitActionToX86BTCode(BitTest::ActionKind A) {
1415 switch (A) {
1416 case BitTest::TestOnly: return '\0';
1417 case BitTest::Complement: return 'c';
1418 case BitTest::Reset: return 'r';
1419 case BitTest::Set: return 's';
1420 }
1421 llvm_unreachable("invalid action");
1422}
1423
1425 BitTest BT,
1426 const CallExpr *E, Value *BitBase,
1427 Value *BitPos) {
1428 char Action = bitActionToX86BTCode(BT.Action);
1429 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1430
1431 // Build the assembly.
1433 raw_svector_ostream AsmOS(Asm);
1434 if (BT.Interlocking != BitTest::Unlocked)
1435 AsmOS << "lock ";
1436 AsmOS << "bt";
1437 if (Action)
1438 AsmOS << Action;
1439 AsmOS << SizeSuffix << " $2, ($1)";
1440
1441 // Build the constraints. FIXME: We should support immediates when possible.
1442 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1443 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1444 if (!MachineClobbers.empty()) {
1445 Constraints += ',';
1446 Constraints += MachineClobbers;
1447 }
1448 llvm::IntegerType *IntType = llvm::IntegerType::get(
1449 CGF.getLLVMContext(),
1450 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1451 llvm::FunctionType *FTy =
1452 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1453
1454 llvm::InlineAsm *IA =
1455 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1456 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1457}
1458
1459static llvm::AtomicOrdering
1460getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1461 switch (I) {
1462 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1463 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1464 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1465 case BitTest::Release: return llvm::AtomicOrdering::Release;
1466 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1467 }
1468 llvm_unreachable("invalid interlocking");
1469}
1470
1471/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1472/// bits and a bit position and read and optionally modify the bit at that
1473/// position. The position index can be arbitrarily large, i.e. it can be larger
1474/// than 31 or 63, so we need an indexed load in the general case.
1475static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1476 unsigned BuiltinID,
1477 const CallExpr *E) {
1478 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1479 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1480
1481 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1482
1483 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1484 // indexing operation internally. Use them if possible.
1485 if (CGF.getTarget().getTriple().isX86())
1486 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1487
1488 // Otherwise, use generic code to load one byte and test the bit. Use all but
1489 // the bottom three bits as the array index, and the bottom three bits to form
1490 // a mask.
1491 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1492 Value *ByteIndex = CGF.Builder.CreateAShr(
1493 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1494 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBase, ByteIndex,
1495 "bittest.byteaddr"),
1496 CGF.Int8Ty, CharUnits::One());
1497 Value *PosLow =
1498 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1499 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1500
1501 // The updating instructions will need a mask.
1502 Value *Mask = nullptr;
1503 if (BT.Action != BitTest::TestOnly) {
1504 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1505 "bittest.mask");
1506 }
1507
1508 // Check the action and ordering of the interlocked intrinsics.
1509 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1510
1511 Value *OldByte = nullptr;
1512 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1513 // Emit a combined atomicrmw load/store operation for the interlocked
1514 // intrinsics.
1515 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1516 if (BT.Action == BitTest::Reset) {
1517 Mask = CGF.Builder.CreateNot(Mask);
1518 RMWOp = llvm::AtomicRMWInst::And;
1519 }
1520 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1521 } else {
1522 // Emit a plain load for the non-interlocked intrinsics.
1523 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1524 Value *NewByte = nullptr;
1525 switch (BT.Action) {
1526 case BitTest::TestOnly:
1527 // Don't store anything.
1528 break;
1529 case BitTest::Complement:
1530 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1531 break;
1532 case BitTest::Reset:
1533 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1534 break;
1535 case BitTest::Set:
1536 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1537 break;
1538 }
1539 if (NewByte)
1540 CGF.Builder.CreateStore(NewByte, ByteAddr);
1541 }
1542
1543 // However we loaded the old byte, either by plain load or atomicrmw, shift
1544 // the bit into the low position and mask it to 0 or 1.
1545 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1546 return CGF.Builder.CreateAnd(
1547 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1548}
1549
1551 unsigned BuiltinID,
1552 const CallExpr *E) {
1553 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1554
1556 raw_svector_ostream AsmOS(Asm);
1557 llvm::IntegerType *RetType = CGF.Int32Ty;
1558
1559 switch (BuiltinID) {
1560 case clang::PPC::BI__builtin_ppc_ldarx:
1561 AsmOS << "ldarx ";
1562 RetType = CGF.Int64Ty;
1563 break;
1564 case clang::PPC::BI__builtin_ppc_lwarx:
1565 AsmOS << "lwarx ";
1566 RetType = CGF.Int32Ty;
1567 break;
1568 case clang::PPC::BI__builtin_ppc_lharx:
1569 AsmOS << "lharx ";
1570 RetType = CGF.Int16Ty;
1571 break;
1572 case clang::PPC::BI__builtin_ppc_lbarx:
1573 AsmOS << "lbarx ";
1574 RetType = CGF.Int8Ty;
1575 break;
1576 default:
1577 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1578 }
1579
1580 AsmOS << "$0, ${1:y}";
1581
1582 std::string Constraints = "=r,*Z,~{memory}";
1583 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1584 if (!MachineClobbers.empty()) {
1585 Constraints += ',';
1586 Constraints += MachineClobbers;
1587 }
1588
1589 llvm::Type *PtrType = CGF.UnqualPtrTy;
1590 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1591
1592 llvm::InlineAsm *IA =
1593 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1594 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1595 CI->addParamAttr(
1596 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1597 return CI;
1598}
1599
1600namespace {
1601enum class MSVCSetJmpKind {
1602 _setjmpex,
1603 _setjmp3,
1604 _setjmp
1605};
1606}
1607
1608/// MSVC handles setjmp a bit differently on different platforms. On every
1609/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1610/// parameters can be passed as variadic arguments, but we always pass none.
1611static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1612 const CallExpr *E) {
1613 llvm::Value *Arg1 = nullptr;
1614 llvm::Type *Arg1Ty = nullptr;
1615 StringRef Name;
1616 bool IsVarArg = false;
1617 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1618 Name = "_setjmp3";
1619 Arg1Ty = CGF.Int32Ty;
1620 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1621 IsVarArg = true;
1622 } else {
1623 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1624 Arg1Ty = CGF.Int8PtrTy;
1625 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1626 Arg1 = CGF.Builder.CreateCall(
1627 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1628 } else
1629 Arg1 = CGF.Builder.CreateCall(
1630 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1631 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1632 }
1633
1634 // Mark the call site and declaration with ReturnsTwice.
1635 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1636 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1637 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1638 llvm::Attribute::ReturnsTwice);
1639 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1640 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1641 ReturnsTwiceAttr, /*Local=*/true);
1642
1643 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1644 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1645 llvm::Value *Args[] = {Buf, Arg1};
1646 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1647 CB->setAttributes(ReturnsTwiceAttr);
1648 return RValue::get(CB);
1649}
1650
1651// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1652// we handle them here.
1693 __fastfail,
1694};
1695
1696static std::optional<CodeGenFunction::MSVCIntrin>
1697translateArmToMsvcIntrin(unsigned BuiltinID) {
1698 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1699 switch (BuiltinID) {
1700 default:
1701 return std::nullopt;
1702 case clang::ARM::BI_BitScanForward:
1703 case clang::ARM::BI_BitScanForward64:
1704 return MSVCIntrin::_BitScanForward;
1705 case clang::ARM::BI_BitScanReverse:
1706 case clang::ARM::BI_BitScanReverse64:
1707 return MSVCIntrin::_BitScanReverse;
1708 case clang::ARM::BI_InterlockedAnd64:
1709 return MSVCIntrin::_InterlockedAnd;
1710 case clang::ARM::BI_InterlockedExchange64:
1711 return MSVCIntrin::_InterlockedExchange;
1712 case clang::ARM::BI_InterlockedExchangeAdd64:
1713 return MSVCIntrin::_InterlockedExchangeAdd;
1714 case clang::ARM::BI_InterlockedExchangeSub64:
1715 return MSVCIntrin::_InterlockedExchangeSub;
1716 case clang::ARM::BI_InterlockedOr64:
1717 return MSVCIntrin::_InterlockedOr;
1718 case clang::ARM::BI_InterlockedXor64:
1719 return MSVCIntrin::_InterlockedXor;
1720 case clang::ARM::BI_InterlockedDecrement64:
1721 return MSVCIntrin::_InterlockedDecrement;
1722 case clang::ARM::BI_InterlockedIncrement64:
1723 return MSVCIntrin::_InterlockedIncrement;
1724 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1725 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1726 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1727 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1728 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1729 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1730 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1731 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1732 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1733 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1734 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1735 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1736 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1737 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1738 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1739 case clang::ARM::BI_InterlockedExchange8_acq:
1740 case clang::ARM::BI_InterlockedExchange16_acq:
1741 case clang::ARM::BI_InterlockedExchange_acq:
1742 case clang::ARM::BI_InterlockedExchange64_acq:
1743 case clang::ARM::BI_InterlockedExchangePointer_acq:
1744 return MSVCIntrin::_InterlockedExchange_acq;
1745 case clang::ARM::BI_InterlockedExchange8_rel:
1746 case clang::ARM::BI_InterlockedExchange16_rel:
1747 case clang::ARM::BI_InterlockedExchange_rel:
1748 case clang::ARM::BI_InterlockedExchange64_rel:
1749 case clang::ARM::BI_InterlockedExchangePointer_rel:
1750 return MSVCIntrin::_InterlockedExchange_rel;
1751 case clang::ARM::BI_InterlockedExchange8_nf:
1752 case clang::ARM::BI_InterlockedExchange16_nf:
1753 case clang::ARM::BI_InterlockedExchange_nf:
1754 case clang::ARM::BI_InterlockedExchange64_nf:
1755 case clang::ARM::BI_InterlockedExchangePointer_nf:
1756 return MSVCIntrin::_InterlockedExchange_nf;
1757 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1758 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1759 case clang::ARM::BI_InterlockedCompareExchange_acq:
1760 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1761 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
1762 return MSVCIntrin::_InterlockedCompareExchange_acq;
1763 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1764 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1765 case clang::ARM::BI_InterlockedCompareExchange_rel:
1766 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1767 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
1768 return MSVCIntrin::_InterlockedCompareExchange_rel;
1769 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1770 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1771 case clang::ARM::BI_InterlockedCompareExchange_nf:
1772 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1773 return MSVCIntrin::_InterlockedCompareExchange_nf;
1774 case clang::ARM::BI_InterlockedOr8_acq:
1775 case clang::ARM::BI_InterlockedOr16_acq:
1776 case clang::ARM::BI_InterlockedOr_acq:
1777 case clang::ARM::BI_InterlockedOr64_acq:
1778 return MSVCIntrin::_InterlockedOr_acq;
1779 case clang::ARM::BI_InterlockedOr8_rel:
1780 case clang::ARM::BI_InterlockedOr16_rel:
1781 case clang::ARM::BI_InterlockedOr_rel:
1782 case clang::ARM::BI_InterlockedOr64_rel:
1783 return MSVCIntrin::_InterlockedOr_rel;
1784 case clang::ARM::BI_InterlockedOr8_nf:
1785 case clang::ARM::BI_InterlockedOr16_nf:
1786 case clang::ARM::BI_InterlockedOr_nf:
1787 case clang::ARM::BI_InterlockedOr64_nf:
1788 return MSVCIntrin::_InterlockedOr_nf;
1789 case clang::ARM::BI_InterlockedXor8_acq:
1790 case clang::ARM::BI_InterlockedXor16_acq:
1791 case clang::ARM::BI_InterlockedXor_acq:
1792 case clang::ARM::BI_InterlockedXor64_acq:
1793 return MSVCIntrin::_InterlockedXor_acq;
1794 case clang::ARM::BI_InterlockedXor8_rel:
1795 case clang::ARM::BI_InterlockedXor16_rel:
1796 case clang::ARM::BI_InterlockedXor_rel:
1797 case clang::ARM::BI_InterlockedXor64_rel:
1798 return MSVCIntrin::_InterlockedXor_rel;
1799 case clang::ARM::BI_InterlockedXor8_nf:
1800 case clang::ARM::BI_InterlockedXor16_nf:
1801 case clang::ARM::BI_InterlockedXor_nf:
1802 case clang::ARM::BI_InterlockedXor64_nf:
1803 return MSVCIntrin::_InterlockedXor_nf;
1804 case clang::ARM::BI_InterlockedAnd8_acq:
1805 case clang::ARM::BI_InterlockedAnd16_acq:
1806 case clang::ARM::BI_InterlockedAnd_acq:
1807 case clang::ARM::BI_InterlockedAnd64_acq:
1808 return MSVCIntrin::_InterlockedAnd_acq;
1809 case clang::ARM::BI_InterlockedAnd8_rel:
1810 case clang::ARM::BI_InterlockedAnd16_rel:
1811 case clang::ARM::BI_InterlockedAnd_rel:
1812 case clang::ARM::BI_InterlockedAnd64_rel:
1813 return MSVCIntrin::_InterlockedAnd_rel;
1814 case clang::ARM::BI_InterlockedAnd8_nf:
1815 case clang::ARM::BI_InterlockedAnd16_nf:
1816 case clang::ARM::BI_InterlockedAnd_nf:
1817 case clang::ARM::BI_InterlockedAnd64_nf:
1818 return MSVCIntrin::_InterlockedAnd_nf;
1819 case clang::ARM::BI_InterlockedIncrement16_acq:
1820 case clang::ARM::BI_InterlockedIncrement_acq:
1821 case clang::ARM::BI_InterlockedIncrement64_acq:
1822 return MSVCIntrin::_InterlockedIncrement_acq;
1823 case clang::ARM::BI_InterlockedIncrement16_rel:
1824 case clang::ARM::BI_InterlockedIncrement_rel:
1825 case clang::ARM::BI_InterlockedIncrement64_rel:
1826 return MSVCIntrin::_InterlockedIncrement_rel;
1827 case clang::ARM::BI_InterlockedIncrement16_nf:
1828 case clang::ARM::BI_InterlockedIncrement_nf:
1829 case clang::ARM::BI_InterlockedIncrement64_nf:
1830 return MSVCIntrin::_InterlockedIncrement_nf;
1831 case clang::ARM::BI_InterlockedDecrement16_acq:
1832 case clang::ARM::BI_InterlockedDecrement_acq:
1833 case clang::ARM::BI_InterlockedDecrement64_acq:
1834 return MSVCIntrin::_InterlockedDecrement_acq;
1835 case clang::ARM::BI_InterlockedDecrement16_rel:
1836 case clang::ARM::BI_InterlockedDecrement_rel:
1837 case clang::ARM::BI_InterlockedDecrement64_rel:
1838 return MSVCIntrin::_InterlockedDecrement_rel;
1839 case clang::ARM::BI_InterlockedDecrement16_nf:
1840 case clang::ARM::BI_InterlockedDecrement_nf:
1841 case clang::ARM::BI_InterlockedDecrement64_nf:
1842 return MSVCIntrin::_InterlockedDecrement_nf;
1843 }
1844 llvm_unreachable("must return from switch");
1845}
1846
1847static std::optional<CodeGenFunction::MSVCIntrin>
1848translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1849 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1850 switch (BuiltinID) {
1851 default:
1852 return std::nullopt;
1853 case clang::AArch64::BI_BitScanForward:
1854 case clang::AArch64::BI_BitScanForward64:
1855 return MSVCIntrin::_BitScanForward;
1856 case clang::AArch64::BI_BitScanReverse:
1857 case clang::AArch64::BI_BitScanReverse64:
1858 return MSVCIntrin::_BitScanReverse;
1859 case clang::AArch64::BI_InterlockedAnd64:
1860 return MSVCIntrin::_InterlockedAnd;
1861 case clang::AArch64::BI_InterlockedExchange64:
1862 return MSVCIntrin::_InterlockedExchange;
1863 case clang::AArch64::BI_InterlockedExchangeAdd64:
1864 return MSVCIntrin::_InterlockedExchangeAdd;
1865 case clang::AArch64::BI_InterlockedExchangeSub64:
1866 return MSVCIntrin::_InterlockedExchangeSub;
1867 case clang::AArch64::BI_InterlockedOr64:
1868 return MSVCIntrin::_InterlockedOr;
1869 case clang::AArch64::BI_InterlockedXor64:
1870 return MSVCIntrin::_InterlockedXor;
1871 case clang::AArch64::BI_InterlockedDecrement64:
1872 return MSVCIntrin::_InterlockedDecrement;
1873 case clang::AArch64::BI_InterlockedIncrement64:
1874 return MSVCIntrin::_InterlockedIncrement;
1875 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1876 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1877 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1878 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1879 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1880 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1881 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1882 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1883 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1884 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1885 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1886 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1887 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1888 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1889 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1890 case clang::AArch64::BI_InterlockedExchange8_acq:
1891 case clang::AArch64::BI_InterlockedExchange16_acq:
1892 case clang::AArch64::BI_InterlockedExchange_acq:
1893 case clang::AArch64::BI_InterlockedExchange64_acq:
1894 case clang::AArch64::BI_InterlockedExchangePointer_acq:
1895 return MSVCIntrin::_InterlockedExchange_acq;
1896 case clang::AArch64::BI_InterlockedExchange8_rel:
1897 case clang::AArch64::BI_InterlockedExchange16_rel:
1898 case clang::AArch64::BI_InterlockedExchange_rel:
1899 case clang::AArch64::BI_InterlockedExchange64_rel:
1900 case clang::AArch64::BI_InterlockedExchangePointer_rel:
1901 return MSVCIntrin::_InterlockedExchange_rel;
1902 case clang::AArch64::BI_InterlockedExchange8_nf:
1903 case clang::AArch64::BI_InterlockedExchange16_nf:
1904 case clang::AArch64::BI_InterlockedExchange_nf:
1905 case clang::AArch64::BI_InterlockedExchange64_nf:
1906 case clang::AArch64::BI_InterlockedExchangePointer_nf:
1907 return MSVCIntrin::_InterlockedExchange_nf;
1908 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1909 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1910 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1911 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1912 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
1913 return MSVCIntrin::_InterlockedCompareExchange_acq;
1914 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1915 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1916 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1917 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1918 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
1919 return MSVCIntrin::_InterlockedCompareExchange_rel;
1920 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1921 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1922 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1923 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1924 return MSVCIntrin::_InterlockedCompareExchange_nf;
1925 case clang::AArch64::BI_InterlockedCompareExchange128:
1926 return MSVCIntrin::_InterlockedCompareExchange128;
1927 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1928 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1929 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1930 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1931 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1932 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1933 case clang::AArch64::BI_InterlockedOr8_acq:
1934 case clang::AArch64::BI_InterlockedOr16_acq:
1935 case clang::AArch64::BI_InterlockedOr_acq:
1936 case clang::AArch64::BI_InterlockedOr64_acq:
1937 return MSVCIntrin::_InterlockedOr_acq;
1938 case clang::AArch64::BI_InterlockedOr8_rel:
1939 case clang::AArch64::BI_InterlockedOr16_rel:
1940 case clang::AArch64::BI_InterlockedOr_rel:
1941 case clang::AArch64::BI_InterlockedOr64_rel:
1942 return MSVCIntrin::_InterlockedOr_rel;
1943 case clang::AArch64::BI_InterlockedOr8_nf:
1944 case clang::AArch64::BI_InterlockedOr16_nf:
1945 case clang::AArch64::BI_InterlockedOr_nf:
1946 case clang::AArch64::BI_InterlockedOr64_nf:
1947 return MSVCIntrin::_InterlockedOr_nf;
1948 case clang::AArch64::BI_InterlockedXor8_acq:
1949 case clang::AArch64::BI_InterlockedXor16_acq:
1950 case clang::AArch64::BI_InterlockedXor_acq:
1951 case clang::AArch64::BI_InterlockedXor64_acq:
1952 return MSVCIntrin::_InterlockedXor_acq;
1953 case clang::AArch64::BI_InterlockedXor8_rel:
1954 case clang::AArch64::BI_InterlockedXor16_rel:
1955 case clang::AArch64::BI_InterlockedXor_rel:
1956 case clang::AArch64::BI_InterlockedXor64_rel:
1957 return MSVCIntrin::_InterlockedXor_rel;
1958 case clang::AArch64::BI_InterlockedXor8_nf:
1959 case clang::AArch64::BI_InterlockedXor16_nf:
1960 case clang::AArch64::BI_InterlockedXor_nf:
1961 case clang::AArch64::BI_InterlockedXor64_nf:
1962 return MSVCIntrin::_InterlockedXor_nf;
1963 case clang::AArch64::BI_InterlockedAnd8_acq:
1964 case clang::AArch64::BI_InterlockedAnd16_acq:
1965 case clang::AArch64::BI_InterlockedAnd_acq:
1966 case clang::AArch64::BI_InterlockedAnd64_acq:
1967 return MSVCIntrin::_InterlockedAnd_acq;
1968 case clang::AArch64::BI_InterlockedAnd8_rel:
1969 case clang::AArch64::BI_InterlockedAnd16_rel:
1970 case clang::AArch64::BI_InterlockedAnd_rel:
1971 case clang::AArch64::BI_InterlockedAnd64_rel:
1972 return MSVCIntrin::_InterlockedAnd_rel;
1973 case clang::AArch64::BI_InterlockedAnd8_nf:
1974 case clang::AArch64::BI_InterlockedAnd16_nf:
1975 case clang::AArch64::BI_InterlockedAnd_nf:
1976 case clang::AArch64::BI_InterlockedAnd64_nf:
1977 return MSVCIntrin::_InterlockedAnd_nf;
1978 case clang::AArch64::BI_InterlockedIncrement16_acq:
1979 case clang::AArch64::BI_InterlockedIncrement_acq:
1980 case clang::AArch64::BI_InterlockedIncrement64_acq:
1981 return MSVCIntrin::_InterlockedIncrement_acq;
1982 case clang::AArch64::BI_InterlockedIncrement16_rel:
1983 case clang::AArch64::BI_InterlockedIncrement_rel:
1984 case clang::AArch64::BI_InterlockedIncrement64_rel:
1985 return MSVCIntrin::_InterlockedIncrement_rel;
1986 case clang::AArch64::BI_InterlockedIncrement16_nf:
1987 case clang::AArch64::BI_InterlockedIncrement_nf:
1988 case clang::AArch64::BI_InterlockedIncrement64_nf:
1989 return MSVCIntrin::_InterlockedIncrement_nf;
1990 case clang::AArch64::BI_InterlockedDecrement16_acq:
1991 case clang::AArch64::BI_InterlockedDecrement_acq:
1992 case clang::AArch64::BI_InterlockedDecrement64_acq:
1993 return MSVCIntrin::_InterlockedDecrement_acq;
1994 case clang::AArch64::BI_InterlockedDecrement16_rel:
1995 case clang::AArch64::BI_InterlockedDecrement_rel:
1996 case clang::AArch64::BI_InterlockedDecrement64_rel:
1997 return MSVCIntrin::_InterlockedDecrement_rel;
1998 case clang::AArch64::BI_InterlockedDecrement16_nf:
1999 case clang::AArch64::BI_InterlockedDecrement_nf:
2000 case clang::AArch64::BI_InterlockedDecrement64_nf:
2001 return MSVCIntrin::_InterlockedDecrement_nf;
2002 }
2003 llvm_unreachable("must return from switch");
2004}
2005
2006static std::optional<CodeGenFunction::MSVCIntrin>
2007translateX86ToMsvcIntrin(unsigned BuiltinID) {
2008 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
2009 switch (BuiltinID) {
2010 default:
2011 return std::nullopt;
2012 case clang::X86::BI_BitScanForward:
2013 case clang::X86::BI_BitScanForward64:
2014 return MSVCIntrin::_BitScanForward;
2015 case clang::X86::BI_BitScanReverse:
2016 case clang::X86::BI_BitScanReverse64:
2017 return MSVCIntrin::_BitScanReverse;
2018 case clang::X86::BI_InterlockedAnd64:
2019 return MSVCIntrin::_InterlockedAnd;
2020 case clang::X86::BI_InterlockedCompareExchange128:
2021 return MSVCIntrin::_InterlockedCompareExchange128;
2022 case clang::X86::BI_InterlockedExchange64:
2023 return MSVCIntrin::_InterlockedExchange;
2024 case clang::X86::BI_InterlockedExchangeAdd64:
2025 return MSVCIntrin::_InterlockedExchangeAdd;
2026 case clang::X86::BI_InterlockedExchangeSub64:
2027 return MSVCIntrin::_InterlockedExchangeSub;
2028 case clang::X86::BI_InterlockedOr64:
2029 return MSVCIntrin::_InterlockedOr;
2030 case clang::X86::BI_InterlockedXor64:
2031 return MSVCIntrin::_InterlockedXor;
2032 case clang::X86::BI_InterlockedDecrement64:
2033 return MSVCIntrin::_InterlockedDecrement;
2034 case clang::X86::BI_InterlockedIncrement64:
2035 return MSVCIntrin::_InterlockedIncrement;
2036 }
2037 llvm_unreachable("must return from switch");
2038}
2039
2040// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
2041Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
2042 const CallExpr *E) {
2043 switch (BuiltinID) {
2044 case MSVCIntrin::_BitScanForward:
2045 case MSVCIntrin::_BitScanReverse: {
2046 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
2047 Value *ArgValue = EmitScalarExpr(E->getArg(1));
2048
2049 llvm::Type *ArgType = ArgValue->getType();
2050 llvm::Type *IndexType = IndexAddress.getElementType();
2051 llvm::Type *ResultType = ConvertType(E->getType());
2052
2053 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
2054 Value *ResZero = llvm::Constant::getNullValue(ResultType);
2055 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
2056
2057 BasicBlock *Begin = Builder.GetInsertBlock();
2058 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
2059 Builder.SetInsertPoint(End);
2060 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
2061
2062 Builder.SetInsertPoint(Begin);
2063 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
2064 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
2065 Builder.CreateCondBr(IsZero, End, NotZero);
2066 Result->addIncoming(ResZero, Begin);
2067
2068 Builder.SetInsertPoint(NotZero);
2069
2070 if (BuiltinID == MSVCIntrin::_BitScanForward) {
2071 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2072 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2073 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2074 Builder.CreateStore(ZeroCount, IndexAddress, false);
2075 } else {
2076 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
2077 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
2078
2079 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2080 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2081 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2082 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
2083 Builder.CreateStore(Index, IndexAddress, false);
2084 }
2085 Builder.CreateBr(End);
2086 Result->addIncoming(ResOne, NotZero);
2087
2088 Builder.SetInsertPoint(End);
2089 return Result;
2090 }
2091 case MSVCIntrin::_InterlockedAnd:
2092 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
2093 case MSVCIntrin::_InterlockedExchange:
2094 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
2095 case MSVCIntrin::_InterlockedExchangeAdd:
2096 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
2097 case MSVCIntrin::_InterlockedExchangeSub:
2098 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
2099 case MSVCIntrin::_InterlockedOr:
2100 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
2101 case MSVCIntrin::_InterlockedXor:
2102 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
2103 case MSVCIntrin::_InterlockedExchangeAdd_acq:
2104 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2105 AtomicOrdering::Acquire);
2106 case MSVCIntrin::_InterlockedExchangeAdd_rel:
2107 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2108 AtomicOrdering::Release);
2109 case MSVCIntrin::_InterlockedExchangeAdd_nf:
2110 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2111 AtomicOrdering::Monotonic);
2112 case MSVCIntrin::_InterlockedExchange_acq:
2113 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2114 AtomicOrdering::Acquire);
2115 case MSVCIntrin::_InterlockedExchange_rel:
2116 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2117 AtomicOrdering::Release);
2118 case MSVCIntrin::_InterlockedExchange_nf:
2119 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2120 AtomicOrdering::Monotonic);
2121 case MSVCIntrin::_InterlockedCompareExchange:
2122 return EmitAtomicCmpXchgForMSIntrin(*this, E);
2123 case MSVCIntrin::_InterlockedCompareExchange_acq:
2124 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
2125 case MSVCIntrin::_InterlockedCompareExchange_rel:
2126 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
2127 case MSVCIntrin::_InterlockedCompareExchange_nf:
2128 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2129 case MSVCIntrin::_InterlockedCompareExchange128:
2131 *this, E, AtomicOrdering::SequentiallyConsistent);
2132 case MSVCIntrin::_InterlockedCompareExchange128_acq:
2133 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
2134 case MSVCIntrin::_InterlockedCompareExchange128_rel:
2135 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
2136 case MSVCIntrin::_InterlockedCompareExchange128_nf:
2137 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2138 case MSVCIntrin::_InterlockedOr_acq:
2139 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2140 AtomicOrdering::Acquire);
2141 case MSVCIntrin::_InterlockedOr_rel:
2142 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2143 AtomicOrdering::Release);
2144 case MSVCIntrin::_InterlockedOr_nf:
2145 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2146 AtomicOrdering::Monotonic);
2147 case MSVCIntrin::_InterlockedXor_acq:
2148 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2149 AtomicOrdering::Acquire);
2150 case MSVCIntrin::_InterlockedXor_rel:
2151 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2152 AtomicOrdering::Release);
2153 case MSVCIntrin::_InterlockedXor_nf:
2154 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2155 AtomicOrdering::Monotonic);
2156 case MSVCIntrin::_InterlockedAnd_acq:
2157 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2158 AtomicOrdering::Acquire);
2159 case MSVCIntrin::_InterlockedAnd_rel:
2160 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2161 AtomicOrdering::Release);
2162 case MSVCIntrin::_InterlockedAnd_nf:
2163 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2164 AtomicOrdering::Monotonic);
2165 case MSVCIntrin::_InterlockedIncrement_acq:
2166 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
2167 case MSVCIntrin::_InterlockedIncrement_rel:
2168 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
2169 case MSVCIntrin::_InterlockedIncrement_nf:
2170 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
2171 case MSVCIntrin::_InterlockedDecrement_acq:
2172 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
2173 case MSVCIntrin::_InterlockedDecrement_rel:
2174 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
2175 case MSVCIntrin::_InterlockedDecrement_nf:
2176 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
2177
2178 case MSVCIntrin::_InterlockedDecrement:
2179 return EmitAtomicDecrementValue(*this, E);
2180 case MSVCIntrin::_InterlockedIncrement:
2181 return EmitAtomicIncrementValue(*this, E);
2182
2183 case MSVCIntrin::__fastfail: {
2184 // Request immediate process termination from the kernel. The instruction
2185 // sequences to do this are documented on MSDN:
2186 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2187 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
2188 StringRef Asm, Constraints;
2189 switch (ISA) {
2190 default:
2191 ErrorUnsupported(E, "__fastfail call for this architecture");
2192 break;
2193 case llvm::Triple::x86:
2194 case llvm::Triple::x86_64:
2195 Asm = "int $$0x29";
2196 Constraints = "{cx}";
2197 break;
2198 case llvm::Triple::thumb:
2199 Asm = "udf #251";
2200 Constraints = "{r0}";
2201 break;
2202 case llvm::Triple::aarch64:
2203 Asm = "brk #0xF003";
2204 Constraints = "{w0}";
2205 }
2206 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
2207 llvm::InlineAsm *IA =
2208 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
2209 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2210 getLLVMContext(), llvm::AttributeList::FunctionIndex,
2211 llvm::Attribute::NoReturn);
2212 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
2213 CI->setAttributes(NoReturnAttr);
2214 return CI;
2215 }
2216 }
2217 llvm_unreachable("Incorrect MSVC intrinsic!");
2218}
2219
2220namespace {
2221// ARC cleanup for __builtin_os_log_format
2222struct CallObjCArcUse final : EHScopeStack::Cleanup {
2223 CallObjCArcUse(llvm::Value *object) : object(object) {}
2224 llvm::Value *object;
2225
2226 void Emit(CodeGenFunction &CGF, Flags flags) override {
2227 CGF.EmitARCIntrinsicUse(object);
2228 }
2229};
2230}
2231
2233 BuiltinCheckKind Kind) {
2234 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
2235 "Unsupported builtin check kind");
2236
2237 Value *ArgValue = EmitScalarExpr(E);
2238 if (!SanOpts.has(SanitizerKind::Builtin))
2239 return ArgValue;
2240
2241 SanitizerScope SanScope(this);
2242 Value *Cond = Builder.CreateICmpNE(
2243 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2244 EmitCheck(std::make_pair(Cond, SanitizerKind::SO_Builtin),
2245 SanitizerHandler::InvalidBuiltin,
2247 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2248 {});
2249 return ArgValue;
2250}
2251
2253 Value *ArgValue = EvaluateExprAsBool(E);
2254 if (!SanOpts.has(SanitizerKind::Builtin))
2255 return ArgValue;
2256
2257 SanitizerScope SanScope(this);
2258 EmitCheck(
2259 std::make_pair(ArgValue, SanitizerKind::SO_Builtin),
2260 SanitizerHandler::InvalidBuiltin,
2262 llvm::ConstantInt::get(Builder.getInt8Ty(), BCK_AssumePassedFalse)},
2263 std::nullopt);
2264 return ArgValue;
2265}
2266
2267static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2268 return CGF.Builder.CreateBinaryIntrinsic(
2269 Intrinsic::abs, ArgValue,
2270 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2271}
2272
2274 bool SanitizeOverflow) {
2275 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2276
2277 // Try to eliminate overflow check.
2278 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2279 if (!VCI->isMinSignedValue())
2280 return EmitAbs(CGF, ArgValue, true);
2281 }
2282
2283 CodeGenFunction::SanitizerScope SanScope(&CGF);
2284
2285 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2286 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2287 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2288 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2289 Value *NotOverflow = CGF.Builder.CreateNot(
2290 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2291
2292 // TODO: support -ftrapv-handler.
2293 if (SanitizeOverflow) {
2294 CGF.EmitCheck({{NotOverflow, SanitizerKind::SO_SignedIntegerOverflow}},
2295 SanitizerHandler::NegateOverflow,
2296 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2298 {ArgValue});
2299 } else
2300 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2301
2302 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2303 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2304}
2305
2306/// Get the argument type for arguments to os_log_helper.
2308 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2309 return C.getCanonicalType(UnsignedTy);
2310}
2311
2314 CharUnits BufferAlignment) {
2315 ASTContext &Ctx = getContext();
2316
2318 {
2319 raw_svector_ostream OS(Name);
2320 OS << "__os_log_helper";
2321 OS << "_" << BufferAlignment.getQuantity();
2322 OS << "_" << int(Layout.getSummaryByte());
2323 OS << "_" << int(Layout.getNumArgsByte());
2324 for (const auto &Item : Layout.Items)
2325 OS << "_" << int(Item.getSizeByte()) << "_"
2326 << int(Item.getDescriptorByte());
2327 }
2328
2329 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2330 return F;
2331
2333 FunctionArgList Args;
2334 Args.push_back(ImplicitParamDecl::Create(
2335 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2337 ArgTys.emplace_back(Ctx.VoidPtrTy);
2338
2339 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2340 char Size = Layout.Items[I].getSizeByte();
2341 if (!Size)
2342 continue;
2343
2344 QualType ArgTy = getOSLogArgType(Ctx, Size);
2345 Args.push_back(ImplicitParamDecl::Create(
2346 Ctx, nullptr, SourceLocation(),
2347 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2349 ArgTys.emplace_back(ArgTy);
2350 }
2351
2352 QualType ReturnTy = Ctx.VoidTy;
2353
2354 // The helper function has linkonce_odr linkage to enable the linker to merge
2355 // identical functions. To ensure the merging always happens, 'noinline' is
2356 // attached to the function when compiling with -Oz.
2357 const CGFunctionInfo &FI =
2359 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2360 llvm::Function *Fn = llvm::Function::Create(
2361 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2362 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2363 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2365 Fn->setDoesNotThrow();
2366
2367 // Attach 'noinline' at -Oz.
2368 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2369 Fn->addFnAttr(llvm::Attribute::NoInline);
2370
2371 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2372 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2373
2374 // Create a scope with an artificial location for the body of this function.
2375 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2376
2377 CharUnits Offset;
2379 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2380 BufferAlignment);
2381 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2382 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2383 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2384 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2385
2386 unsigned I = 1;
2387 for (const auto &Item : Layout.Items) {
2389 Builder.getInt8(Item.getDescriptorByte()),
2390 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2392 Builder.getInt8(Item.getSizeByte()),
2393 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2394
2395 CharUnits Size = Item.size();
2396 if (!Size.getQuantity())
2397 continue;
2398
2399 Address Arg = GetAddrOfLocalVar(Args[I]);
2400 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2401 Addr = Addr.withElementType(Arg.getElementType());
2403 Offset += Size;
2404 ++I;
2405 }
2406
2408
2409 return Fn;
2410}
2411
2413 assert(E.getNumArgs() >= 2 &&
2414 "__builtin_os_log_format takes at least 2 arguments");
2415 ASTContext &Ctx = getContext();
2418 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2419 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2420
2421 // Ignore argument 1, the format string. It is not currently used.
2422 CallArgList Args;
2423 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2424
2425 for (const auto &Item : Layout.Items) {
2426 int Size = Item.getSizeByte();
2427 if (!Size)
2428 continue;
2429
2430 llvm::Value *ArgVal;
2431
2432 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2433 uint64_t Val = 0;
2434 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2435 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2436 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2437 } else if (const Expr *TheExpr = Item.getExpr()) {
2438 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2439
2440 // If a temporary object that requires destruction after the full
2441 // expression is passed, push a lifetime-extended cleanup to extend its
2442 // lifetime to the end of the enclosing block scope.
2443 auto LifetimeExtendObject = [&](const Expr *E) {
2444 E = E->IgnoreParenCasts();
2445 // Extend lifetimes of objects returned by function calls and message
2446 // sends.
2447
2448 // FIXME: We should do this in other cases in which temporaries are
2449 // created including arguments of non-ARC types (e.g., C++
2450 // temporaries).
2451 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2452 return true;
2453 return false;
2454 };
2455
2456 if (TheExpr->getType()->isObjCRetainableType() &&
2457 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2458 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2459 "Only scalar can be a ObjC retainable type");
2460 if (!isa<Constant>(ArgVal)) {
2461 CleanupKind Cleanup = getARCCleanupKind();
2462 QualType Ty = TheExpr->getType();
2464 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2465 ArgVal = EmitARCRetain(Ty, ArgVal);
2466 Builder.CreateStore(ArgVal, Addr);
2467 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2469 Cleanup & EHCleanup);
2470
2471 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2472 // argument has to be alive.
2473 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2474 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2475 }
2476 }
2477 } else {
2478 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2479 }
2480
2481 unsigned ArgValSize =
2482 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2483 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2484 ArgValSize);
2485 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2486 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2487 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2488 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2489 Args.add(RValue::get(ArgVal), ArgTy);
2490 }
2491
2492 const CGFunctionInfo &FI =
2495 Layout, BufAddr.getAlignment());
2497 return RValue::get(BufAddr, *this);
2498}
2499
2501 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2502 WidthAndSignedness ResultInfo) {
2503 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2504 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2505 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2506}
2507
2509 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2510 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2511 const clang::Expr *ResultArg, QualType ResultQTy,
2512 WidthAndSignedness ResultInfo) {
2514 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2515 "Cannot specialize this multiply");
2516
2517 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2518 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2519
2520 llvm::Value *HasOverflow;
2521 llvm::Value *Result = EmitOverflowIntrinsic(
2522 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2523
2524 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2525 // however, since the original builtin had a signed result, we need to report
2526 // an overflow when the result is greater than INT_MAX.
2527 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2528 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2529
2530 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2531 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2532
2533 bool isVolatile =
2534 ResultArg->getType()->getPointeeType().isVolatileQualified();
2535 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2536 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2537 isVolatile);
2538 return RValue::get(HasOverflow);
2539}
2540
2541/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2542static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2543 WidthAndSignedness Op1Info,
2544 WidthAndSignedness Op2Info,
2545 WidthAndSignedness ResultInfo) {
2546 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2547 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2548 Op1Info.Signed != Op2Info.Signed;
2549}
2550
2551/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2552/// the generic checked-binop irgen.
2553static RValue
2555 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2556 WidthAndSignedness Op2Info,
2557 const clang::Expr *ResultArg, QualType ResultQTy,
2558 WidthAndSignedness ResultInfo) {
2559 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2560 Op2Info, ResultInfo) &&
2561 "Not a mixed-sign multipliction we can specialize");
2562
2563 // Emit the signed and unsigned operands.
2564 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2565 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2566 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2567 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2568 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2569 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2570
2571 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2572 if (SignedOpWidth < UnsignedOpWidth)
2573 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2574 if (UnsignedOpWidth < SignedOpWidth)
2575 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2576
2577 llvm::Type *OpTy = Signed->getType();
2578 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2579 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2580 llvm::Type *ResTy = ResultPtr.getElementType();
2581 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2582
2583 // Take the absolute value of the signed operand.
2584 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2585 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2586 llvm::Value *AbsSigned =
2587 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2588
2589 // Perform a checked unsigned multiplication.
2590 llvm::Value *UnsignedOverflow;
2591 llvm::Value *UnsignedResult =
2592 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2593 Unsigned, UnsignedOverflow);
2594
2595 llvm::Value *Overflow, *Result;
2596 if (ResultInfo.Signed) {
2597 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2598 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2599 auto IntMax =
2600 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2601 llvm::Value *MaxResult =
2602 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2603 CGF.Builder.CreateZExt(IsNegative, OpTy));
2604 llvm::Value *SignedOverflow =
2605 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2606 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2607
2608 // Prepare the signed result (possibly by negating it).
2609 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2610 llvm::Value *SignedResult =
2611 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2612 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2613 } else {
2614 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2615 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2616 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2617 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2618 if (ResultInfo.Width < OpWidth) {
2619 auto IntMax =
2620 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2621 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2622 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2623 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2624 }
2625
2626 // Negate the product if it would be negative in infinite precision.
2627 Result = CGF.Builder.CreateSelect(
2628 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2629
2630 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2631 }
2632 assert(Overflow && Result && "Missing overflow or result");
2633
2634 bool isVolatile =
2635 ResultArg->getType()->getPointeeType().isVolatileQualified();
2636 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2637 isVolatile);
2638 return RValue::get(Overflow);
2639}
2640
2641static bool
2643 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2644 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2645 Ty = Ctx.getBaseElementType(Arr);
2646
2647 const auto *Record = Ty->getAsCXXRecordDecl();
2648 if (!Record)
2649 return false;
2650
2651 // We've already checked this type, or are in the process of checking it.
2652 if (!Seen.insert(Record).second)
2653 return false;
2654
2655 assert(Record->hasDefinition() &&
2656 "Incomplete types should already be diagnosed");
2657
2658 if (Record->isDynamicClass())
2659 return true;
2660
2661 for (FieldDecl *F : Record->fields()) {
2662 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2663 return true;
2664 }
2665 return false;
2666}
2667
2668/// Determine if the specified type requires laundering by checking if it is a
2669/// dynamic class type or contains a subobject which is a dynamic class type.
2671 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2672 return false;
2674 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2675}
2676
2677RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2678 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2679 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2680
2681 // The builtin's shift arg may have a different type than the source arg and
2682 // result, but the LLVM intrinsic uses the same type for all values.
2683 llvm::Type *Ty = Src->getType();
2684 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2685
2686 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2687 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2688 Function *F = CGM.getIntrinsic(IID, Ty);
2689 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2690}
2691
2692// Map math builtins for long-double to f128 version.
2693static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2694 switch (BuiltinID) {
2695#define MUTATE_LDBL(func) \
2696 case Builtin::BI__builtin_##func##l: \
2697 return Builtin::BI__builtin_##func##f128;
2728 MUTATE_LDBL(nans)
2729 MUTATE_LDBL(inf)
2748 MUTATE_LDBL(huge_val)
2758#undef MUTATE_LDBL
2759 default:
2760 return BuiltinID;
2761 }
2762}
2763
2764static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2765 Value *V) {
2766 if (CGF.Builder.getIsFPConstrained() &&
2767 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2768 if (Value *Result =
2769 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2770 return Result;
2771 }
2772 return nullptr;
2773}
2774
2776 const FunctionDecl *FD) {
2777 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2778 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2779 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2780
2782 for (auto &&FormalTy : FnTy->params())
2783 Args.push_back(llvm::PoisonValue::get(FormalTy));
2784
2785 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2786}
2787
2788RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2789 const CallExpr *E,
2790 ReturnValueSlot ReturnValue) {
2791 assert(!getContext().BuiltinInfo.isImmediate(BuiltinID) &&
2792 "Should not codegen for consteval builtins");
2793
2794 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2795 // See if we can constant fold this builtin. If so, don't emit it at all.
2796 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2799 !Result.hasSideEffects()) {
2800 if (Result.Val.isInt())
2801 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2802 Result.Val.getInt()));
2803 if (Result.Val.isFloat())
2804 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2805 Result.Val.getFloat()));
2806 }
2807
2808 // If current long-double semantics is IEEE 128-bit, replace math builtins
2809 // of long-double with f128 equivalent.
2810 // TODO: This mutation should also be applied to other targets other than PPC,
2811 // after backend supports IEEE 128-bit style libcalls.
2812 if (getTarget().getTriple().isPPC64() &&
2813 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2814 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2815
2816 // If the builtin has been declared explicitly with an assembler label,
2817 // disable the specialized emitting below. Ideally we should communicate the
2818 // rename in IR, or at least avoid generating the intrinsic calls that are
2819 // likely to get lowered to the renamed library functions.
2820 const unsigned BuiltinIDIfNoAsmLabel =
2821 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2822
2823 std::optional<bool> ErrnoOverriden;
2824 // ErrnoOverriden is true if math-errno is overriden via the
2825 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2826 // which implies math-errno.
2827 if (E->hasStoredFPFeatures()) {
2828 FPOptionsOverride OP = E->getFPFeatures();
2829 if (OP.hasMathErrnoOverride())
2830 ErrnoOverriden = OP.getMathErrnoOverride();
2831 }
2832 // True if 'attribute__((optnone))' is used. This attribute overrides
2833 // fast-math which implies math-errno.
2834 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2835
2836 // True if we are compiling at -O2 and errno has been disabled
2837 // using the '#pragma float_control(precise, off)', and
2838 // attribute opt-none hasn't been seen.
2839 bool ErrnoOverridenToFalseWithOpt =
2840 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2841 CGM.getCodeGenOpts().OptimizationLevel != 0;
2842
2843 // There are LLVM math intrinsics/instructions corresponding to math library
2844 // functions except the LLVM op will never set errno while the math library
2845 // might. Also, math builtins have the same semantics as their math library
2846 // twins. Thus, we can transform math library and builtin calls to their
2847 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2848 // In case FP exceptions are enabled, the experimental versions of the
2849 // intrinsics model those.
2850 bool ConstAlways =
2851 getContext().BuiltinInfo.isConst(BuiltinID);
2852
2853 // There's a special case with the fma builtins where they are always const
2854 // if the target environment is GNU or the target is OS is Windows and we're
2855 // targeting the MSVCRT.dll environment.
2856 // FIXME: This list can be become outdated. Need to find a way to get it some
2857 // other way.
2858 switch (BuiltinID) {
2859 case Builtin::BI__builtin_fma:
2860 case Builtin::BI__builtin_fmaf:
2861 case Builtin::BI__builtin_fmal:
2862 case Builtin::BI__builtin_fmaf16:
2863 case Builtin::BIfma:
2864 case Builtin::BIfmaf:
2865 case Builtin::BIfmal: {
2866 auto &Trip = CGM.getTriple();
2867 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2868 ConstAlways = true;
2869 break;
2870 }
2871 default:
2872 break;
2873 }
2874
2875 bool ConstWithoutErrnoAndExceptions =
2877 bool ConstWithoutExceptions =
2879
2880 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2881 // disabled.
2882 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2883 // or attributes that affect math-errno should prevent or allow math
2884 // intrincs to be generated. Intrinsics are generated:
2885 // 1- In fast math mode, unless math-errno is overriden
2886 // via '#pragma float_control(precise, on)', or via an
2887 // 'attribute__((optnone))'.
2888 // 2- If math-errno was enabled on command line but overriden
2889 // to false via '#pragma float_control(precise, off))' and
2890 // 'attribute__((optnone))' hasn't been used.
2891 // 3- If we are compiling with optimization and errno has been disabled
2892 // via '#pragma float_control(precise, off)', and
2893 // 'attribute__((optnone))' hasn't been used.
2894
2895 bool ConstWithoutErrnoOrExceptions =
2896 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2897 bool GenerateIntrinsics =
2898 (ConstAlways && !OptNone) ||
2899 (!getLangOpts().MathErrno &&
2900 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2901 if (!GenerateIntrinsics) {
2902 GenerateIntrinsics =
2903 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2904 if (!GenerateIntrinsics)
2905 GenerateIntrinsics =
2906 ConstWithoutErrnoOrExceptions &&
2907 (!getLangOpts().MathErrno &&
2908 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2909 if (!GenerateIntrinsics)
2910 GenerateIntrinsics =
2911 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2912 }
2913 if (GenerateIntrinsics) {
2914 switch (BuiltinIDIfNoAsmLabel) {
2915 case Builtin::BIacos:
2916 case Builtin::BIacosf:
2917 case Builtin::BIacosl:
2918 case Builtin::BI__builtin_acos:
2919 case Builtin::BI__builtin_acosf:
2920 case Builtin::BI__builtin_acosf16:
2921 case Builtin::BI__builtin_acosl:
2922 case Builtin::BI__builtin_acosf128:
2924 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2925
2926 case Builtin::BIasin:
2927 case Builtin::BIasinf:
2928 case Builtin::BIasinl:
2929 case Builtin::BI__builtin_asin:
2930 case Builtin::BI__builtin_asinf:
2931 case Builtin::BI__builtin_asinf16:
2932 case Builtin::BI__builtin_asinl:
2933 case Builtin::BI__builtin_asinf128:
2935 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2936
2937 case Builtin::BIatan:
2938 case Builtin::BIatanf:
2939 case Builtin::BIatanl:
2940 case Builtin::BI__builtin_atan:
2941 case Builtin::BI__builtin_atanf:
2942 case Builtin::BI__builtin_atanf16:
2943 case Builtin::BI__builtin_atanl:
2944 case Builtin::BI__builtin_atanf128:
2946 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2947
2948 case Builtin::BIatan2:
2949 case Builtin::BIatan2f:
2950 case Builtin::BIatan2l:
2951 case Builtin::BI__builtin_atan2:
2952 case Builtin::BI__builtin_atan2f:
2953 case Builtin::BI__builtin_atan2f16:
2954 case Builtin::BI__builtin_atan2l:
2955 case Builtin::BI__builtin_atan2f128:
2957 *this, E, Intrinsic::atan2,
2958 Intrinsic::experimental_constrained_atan2));
2959
2960 case Builtin::BIceil:
2961 case Builtin::BIceilf:
2962 case Builtin::BIceill:
2963 case Builtin::BI__builtin_ceil:
2964 case Builtin::BI__builtin_ceilf:
2965 case Builtin::BI__builtin_ceilf16:
2966 case Builtin::BI__builtin_ceill:
2967 case Builtin::BI__builtin_ceilf128:
2969 Intrinsic::ceil,
2970 Intrinsic::experimental_constrained_ceil));
2971
2972 case Builtin::BIcopysign:
2973 case Builtin::BIcopysignf:
2974 case Builtin::BIcopysignl:
2975 case Builtin::BI__builtin_copysign:
2976 case Builtin::BI__builtin_copysignf:
2977 case Builtin::BI__builtin_copysignf16:
2978 case Builtin::BI__builtin_copysignl:
2979 case Builtin::BI__builtin_copysignf128:
2980 return RValue::get(
2981 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2982
2983 case Builtin::BIcos:
2984 case Builtin::BIcosf:
2985 case Builtin::BIcosl:
2986 case Builtin::BI__builtin_cos:
2987 case Builtin::BI__builtin_cosf:
2988 case Builtin::BI__builtin_cosf16:
2989 case Builtin::BI__builtin_cosl:
2990 case Builtin::BI__builtin_cosf128:
2992 Intrinsic::cos,
2993 Intrinsic::experimental_constrained_cos));
2994
2995 case Builtin::BIcosh:
2996 case Builtin::BIcoshf:
2997 case Builtin::BIcoshl:
2998 case Builtin::BI__builtin_cosh:
2999 case Builtin::BI__builtin_coshf:
3000 case Builtin::BI__builtin_coshf16:
3001 case Builtin::BI__builtin_coshl:
3002 case Builtin::BI__builtin_coshf128:
3004 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
3005
3006 case Builtin::BIexp:
3007 case Builtin::BIexpf:
3008 case Builtin::BIexpl:
3009 case Builtin::BI__builtin_exp:
3010 case Builtin::BI__builtin_expf:
3011 case Builtin::BI__builtin_expf16:
3012 case Builtin::BI__builtin_expl:
3013 case Builtin::BI__builtin_expf128:
3015 Intrinsic::exp,
3016 Intrinsic::experimental_constrained_exp));
3017
3018 case Builtin::BIexp2:
3019 case Builtin::BIexp2f:
3020 case Builtin::BIexp2l:
3021 case Builtin::BI__builtin_exp2:
3022 case Builtin::BI__builtin_exp2f:
3023 case Builtin::BI__builtin_exp2f16:
3024 case Builtin::BI__builtin_exp2l:
3025 case Builtin::BI__builtin_exp2f128:
3027 Intrinsic::exp2,
3028 Intrinsic::experimental_constrained_exp2));
3029 case Builtin::BI__builtin_exp10:
3030 case Builtin::BI__builtin_exp10f:
3031 case Builtin::BI__builtin_exp10f16:
3032 case Builtin::BI__builtin_exp10l:
3033 case Builtin::BI__builtin_exp10f128: {
3034 // TODO: strictfp support
3035 if (Builder.getIsFPConstrained())
3036 break;
3037 return RValue::get(
3038 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
3039 }
3040 case Builtin::BIfabs:
3041 case Builtin::BIfabsf:
3042 case Builtin::BIfabsl:
3043 case Builtin::BI__builtin_fabs:
3044 case Builtin::BI__builtin_fabsf:
3045 case Builtin::BI__builtin_fabsf16:
3046 case Builtin::BI__builtin_fabsl:
3047 case Builtin::BI__builtin_fabsf128:
3048 return RValue::get(
3049 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
3050
3051 case Builtin::BIfloor:
3052 case Builtin::BIfloorf:
3053 case Builtin::BIfloorl:
3054 case Builtin::BI__builtin_floor:
3055 case Builtin::BI__builtin_floorf:
3056 case Builtin::BI__builtin_floorf16:
3057 case Builtin::BI__builtin_floorl:
3058 case Builtin::BI__builtin_floorf128:
3060 Intrinsic::floor,
3061 Intrinsic::experimental_constrained_floor));
3062
3063 case Builtin::BIfma:
3064 case Builtin::BIfmaf:
3065 case Builtin::BIfmal:
3066 case Builtin::BI__builtin_fma:
3067 case Builtin::BI__builtin_fmaf:
3068 case Builtin::BI__builtin_fmaf16:
3069 case Builtin::BI__builtin_fmal:
3070 case Builtin::BI__builtin_fmaf128:
3072 Intrinsic::fma,
3073 Intrinsic::experimental_constrained_fma));
3074
3075 case Builtin::BIfmax:
3076 case Builtin::BIfmaxf:
3077 case Builtin::BIfmaxl:
3078 case Builtin::BI__builtin_fmax:
3079 case Builtin::BI__builtin_fmaxf:
3080 case Builtin::BI__builtin_fmaxf16:
3081 case Builtin::BI__builtin_fmaxl:
3082 case Builtin::BI__builtin_fmaxf128:
3084 Intrinsic::maxnum,
3085 Intrinsic::experimental_constrained_maxnum));
3086
3087 case Builtin::BIfmin:
3088 case Builtin::BIfminf:
3089 case Builtin::BIfminl:
3090 case Builtin::BI__builtin_fmin:
3091 case Builtin::BI__builtin_fminf:
3092 case Builtin::BI__builtin_fminf16:
3093 case Builtin::BI__builtin_fminl:
3094 case Builtin::BI__builtin_fminf128:
3096 Intrinsic::minnum,
3097 Intrinsic::experimental_constrained_minnum));
3098
3099 case Builtin::BIfmaximum_num:
3100 case Builtin::BIfmaximum_numf:
3101 case Builtin::BIfmaximum_numl:
3102 case Builtin::BI__builtin_fmaximum_num:
3103 case Builtin::BI__builtin_fmaximum_numf:
3104 case Builtin::BI__builtin_fmaximum_numf16:
3105 case Builtin::BI__builtin_fmaximum_numl:
3106 case Builtin::BI__builtin_fmaximum_numf128:
3107 return RValue::get(
3108 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::maximumnum));
3109
3110 case Builtin::BIfminimum_num:
3111 case Builtin::BIfminimum_numf:
3112 case Builtin::BIfminimum_numl:
3113 case Builtin::BI__builtin_fminimum_num:
3114 case Builtin::BI__builtin_fminimum_numf:
3115 case Builtin::BI__builtin_fminimum_numf16:
3116 case Builtin::BI__builtin_fminimum_numl:
3117 case Builtin::BI__builtin_fminimum_numf128:
3118 return RValue::get(
3119 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::minimumnum));
3120
3121 // fmod() is a special-case. It maps to the frem instruction rather than an
3122 // LLVM intrinsic.
3123 case Builtin::BIfmod:
3124 case Builtin::BIfmodf:
3125 case Builtin::BIfmodl:
3126 case Builtin::BI__builtin_fmod:
3127 case Builtin::BI__builtin_fmodf:
3128 case Builtin::BI__builtin_fmodf16:
3129 case Builtin::BI__builtin_fmodl:
3130 case Builtin::BI__builtin_fmodf128:
3131 case Builtin::BI__builtin_elementwise_fmod: {
3132 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3133 Value *Arg1 = EmitScalarExpr(E->getArg(0));
3134 Value *Arg2 = EmitScalarExpr(E->getArg(1));
3135 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
3136 }
3137
3138 case Builtin::BIlog:
3139 case Builtin::BIlogf:
3140 case Builtin::BIlogl:
3141 case Builtin::BI__builtin_log:
3142 case Builtin::BI__builtin_logf:
3143 case Builtin::BI__builtin_logf16:
3144 case Builtin::BI__builtin_logl:
3145 case Builtin::BI__builtin_logf128:
3147 Intrinsic::log,
3148 Intrinsic::experimental_constrained_log));
3149
3150 case Builtin::BIlog10:
3151 case Builtin::BIlog10f:
3152 case Builtin::BIlog10l:
3153 case Builtin::BI__builtin_log10:
3154 case Builtin::BI__builtin_log10f:
3155 case Builtin::BI__builtin_log10f16:
3156 case Builtin::BI__builtin_log10l:
3157 case Builtin::BI__builtin_log10f128:
3159 Intrinsic::log10,
3160 Intrinsic::experimental_constrained_log10));
3161
3162 case Builtin::BIlog2:
3163 case Builtin::BIlog2f:
3164 case Builtin::BIlog2l:
3165 case Builtin::BI__builtin_log2:
3166 case Builtin::BI__builtin_log2f:
3167 case Builtin::BI__builtin_log2f16:
3168 case Builtin::BI__builtin_log2l:
3169 case Builtin::BI__builtin_log2f128:
3171 Intrinsic::log2,
3172 Intrinsic::experimental_constrained_log2));
3173
3174 case Builtin::BInearbyint:
3175 case Builtin::BInearbyintf:
3176 case Builtin::BInearbyintl:
3177 case Builtin::BI__builtin_nearbyint:
3178 case Builtin::BI__builtin_nearbyintf:
3179 case Builtin::BI__builtin_nearbyintl:
3180 case Builtin::BI__builtin_nearbyintf128:
3182 Intrinsic::nearbyint,
3183 Intrinsic::experimental_constrained_nearbyint));
3184
3185 case Builtin::BIpow:
3186 case Builtin::BIpowf:
3187 case Builtin::BIpowl:
3188 case Builtin::BI__builtin_pow:
3189 case Builtin::BI__builtin_powf:
3190 case Builtin::BI__builtin_powf16:
3191 case Builtin::BI__builtin_powl:
3192 case Builtin::BI__builtin_powf128:
3194 Intrinsic::pow,
3195 Intrinsic::experimental_constrained_pow));
3196
3197 case Builtin::BIrint:
3198 case Builtin::BIrintf:
3199 case Builtin::BIrintl:
3200 case Builtin::BI__builtin_rint:
3201 case Builtin::BI__builtin_rintf:
3202 case Builtin::BI__builtin_rintf16:
3203 case Builtin::BI__builtin_rintl:
3204 case Builtin::BI__builtin_rintf128:
3206 Intrinsic::rint,
3207 Intrinsic::experimental_constrained_rint));
3208
3209 case Builtin::BIround:
3210 case Builtin::BIroundf:
3211 case Builtin::BIroundl:
3212 case Builtin::BI__builtin_round:
3213 case Builtin::BI__builtin_roundf:
3214 case Builtin::BI__builtin_roundf16:
3215 case Builtin::BI__builtin_roundl:
3216 case Builtin::BI__builtin_roundf128:
3218 Intrinsic::round,
3219 Intrinsic::experimental_constrained_round));
3220
3221 case Builtin::BIroundeven:
3222 case Builtin::BIroundevenf:
3223 case Builtin::BIroundevenl:
3224 case Builtin::BI__builtin_roundeven:
3225 case Builtin::BI__builtin_roundevenf:
3226 case Builtin::BI__builtin_roundevenf16:
3227 case Builtin::BI__builtin_roundevenl:
3228 case Builtin::BI__builtin_roundevenf128:
3230 Intrinsic::roundeven,
3231 Intrinsic::experimental_constrained_roundeven));
3232
3233 case Builtin::BIsin:
3234 case Builtin::BIsinf:
3235 case Builtin::BIsinl:
3236 case Builtin::BI__builtin_sin:
3237 case Builtin::BI__builtin_sinf:
3238 case Builtin::BI__builtin_sinf16:
3239 case Builtin::BI__builtin_sinl:
3240 case Builtin::BI__builtin_sinf128:
3242 Intrinsic::sin,
3243 Intrinsic::experimental_constrained_sin));
3244
3245 case Builtin::BIsinh:
3246 case Builtin::BIsinhf:
3247 case Builtin::BIsinhl:
3248 case Builtin::BI__builtin_sinh:
3249 case Builtin::BI__builtin_sinhf:
3250 case Builtin::BI__builtin_sinhf16:
3251 case Builtin::BI__builtin_sinhl:
3252 case Builtin::BI__builtin_sinhf128:
3254 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
3255
3256 case Builtin::BI__builtin_sincos:
3257 case Builtin::BI__builtin_sincosf:
3258 case Builtin::BI__builtin_sincosf16:
3259 case Builtin::BI__builtin_sincosl:
3260 case Builtin::BI__builtin_sincosf128:
3261 emitSincosBuiltin(*this, E, Intrinsic::sincos);
3262 return RValue::get(nullptr);
3263
3264 case Builtin::BIsqrt:
3265 case Builtin::BIsqrtf:
3266 case Builtin::BIsqrtl:
3267 case Builtin::BI__builtin_sqrt:
3268 case Builtin::BI__builtin_sqrtf:
3269 case Builtin::BI__builtin_sqrtf16:
3270 case Builtin::BI__builtin_sqrtl:
3271 case Builtin::BI__builtin_sqrtf128:
3272 case Builtin::BI__builtin_elementwise_sqrt: {
3274 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
3276 return RValue::get(Call);
3277 }
3278
3279 case Builtin::BItan:
3280 case Builtin::BItanf:
3281 case Builtin::BItanl:
3282 case Builtin::BI__builtin_tan:
3283 case Builtin::BI__builtin_tanf:
3284 case Builtin::BI__builtin_tanf16:
3285 case Builtin::BI__builtin_tanl:
3286 case Builtin::BI__builtin_tanf128:
3288 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
3289
3290 case Builtin::BItanh:
3291 case Builtin::BItanhf:
3292 case Builtin::BItanhl:
3293 case Builtin::BI__builtin_tanh:
3294 case Builtin::BI__builtin_tanhf:
3295 case Builtin::BI__builtin_tanhf16:
3296 case Builtin::BI__builtin_tanhl:
3297 case Builtin::BI__builtin_tanhf128:
3299 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3300
3301 case Builtin::BItrunc:
3302 case Builtin::BItruncf:
3303 case Builtin::BItruncl:
3304 case Builtin::BI__builtin_trunc:
3305 case Builtin::BI__builtin_truncf:
3306 case Builtin::BI__builtin_truncf16:
3307 case Builtin::BI__builtin_truncl:
3308 case Builtin::BI__builtin_truncf128:
3310 Intrinsic::trunc,
3311 Intrinsic::experimental_constrained_trunc));
3312
3313 case Builtin::BIlround:
3314 case Builtin::BIlroundf:
3315 case Builtin::BIlroundl:
3316 case Builtin::BI__builtin_lround:
3317 case Builtin::BI__builtin_lroundf:
3318 case Builtin::BI__builtin_lroundl:
3319 case Builtin::BI__builtin_lroundf128:
3321 *this, E, Intrinsic::lround,
3322 Intrinsic::experimental_constrained_lround));
3323
3324 case Builtin::BIllround:
3325 case Builtin::BIllroundf:
3326 case Builtin::BIllroundl:
3327 case Builtin::BI__builtin_llround:
3328 case Builtin::BI__builtin_llroundf:
3329 case Builtin::BI__builtin_llroundl:
3330 case Builtin::BI__builtin_llroundf128:
3332 *this, E, Intrinsic::llround,
3333 Intrinsic::experimental_constrained_llround));
3334
3335 case Builtin::BIlrint:
3336 case Builtin::BIlrintf:
3337 case Builtin::BIlrintl:
3338 case Builtin::BI__builtin_lrint:
3339 case Builtin::BI__builtin_lrintf:
3340 case Builtin::BI__builtin_lrintl:
3341 case Builtin::BI__builtin_lrintf128:
3343 *this, E, Intrinsic::lrint,
3344 Intrinsic::experimental_constrained_lrint));
3345
3346 case Builtin::BIllrint:
3347 case Builtin::BIllrintf:
3348 case Builtin::BIllrintl:
3349 case Builtin::BI__builtin_llrint:
3350 case Builtin::BI__builtin_llrintf:
3351 case Builtin::BI__builtin_llrintl:
3352 case Builtin::BI__builtin_llrintf128:
3354 *this, E, Intrinsic::llrint,
3355 Intrinsic::experimental_constrained_llrint));
3356 case Builtin::BI__builtin_ldexp:
3357 case Builtin::BI__builtin_ldexpf:
3358 case Builtin::BI__builtin_ldexpl:
3359 case Builtin::BI__builtin_ldexpf16:
3360 case Builtin::BI__builtin_ldexpf128: {
3362 *this, E, Intrinsic::ldexp,
3363 Intrinsic::experimental_constrained_ldexp));
3364 }
3365 default:
3366 break;
3367 }
3368 }
3369
3370 // Check NonnullAttribute/NullabilityArg and Alignment.
3371 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3372 unsigned ParmNum) {
3373 Value *Val = A.emitRawPointer(*this);
3374 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3375 ParmNum);
3376
3377 if (SanOpts.has(SanitizerKind::Alignment)) {
3378 SanitizerSet SkippedChecks;
3379 SkippedChecks.set(SanitizerKind::All);
3380 SkippedChecks.clear(SanitizerKind::Alignment);
3381 SourceLocation Loc = Arg->getExprLoc();
3382 // Strip an implicit cast.
3383 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3384 if (CE->getCastKind() == CK_BitCast)
3385 Arg = CE->getSubExpr();
3386 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3387 SkippedChecks);
3388 }
3389 };
3390
3391 switch (BuiltinIDIfNoAsmLabel) {
3392 default: break;
3393 case Builtin::BI__builtin___CFStringMakeConstantString:
3394 case Builtin::BI__builtin___NSStringMakeConstantString:
3395 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3396 case Builtin::BI__builtin_stdarg_start:
3397 case Builtin::BI__builtin_va_start:
3398 case Builtin::BI__va_start:
3399 case Builtin::BI__builtin_va_end:
3400 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3401 ? EmitScalarExpr(E->getArg(0))
3402 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3403 BuiltinID != Builtin::BI__builtin_va_end);
3404 return RValue::get(nullptr);
3405 case Builtin::BI__builtin_va_copy: {
3406 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3407 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3408 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3409 {DstPtr, SrcPtr});
3410 return RValue::get(nullptr);
3411 }
3412 case Builtin::BIabs:
3413 case Builtin::BIlabs:
3414 case Builtin::BIllabs:
3415 case Builtin::BI__builtin_abs:
3416 case Builtin::BI__builtin_labs:
3417 case Builtin::BI__builtin_llabs: {
3418 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3419
3420 Value *Result;
3421 switch (getLangOpts().getSignedOverflowBehavior()) {
3423 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3424 break;
3426 if (!SanitizeOverflow) {
3427 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3428 break;
3429 }
3430 [[fallthrough]];
3432 // TODO: Somehow handle the corner case when the address of abs is taken.
3433 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3434 break;
3435 }
3436 return RValue::get(Result);
3437 }
3438 case Builtin::BI__builtin_complex: {
3439 Value *Real = EmitScalarExpr(E->getArg(0));
3440 Value *Imag = EmitScalarExpr(E->getArg(1));
3441 return RValue::getComplex({Real, Imag});
3442 }
3443 case Builtin::BI__builtin_conj:
3444 case Builtin::BI__builtin_conjf:
3445 case Builtin::BI__builtin_conjl:
3446 case Builtin::BIconj:
3447 case Builtin::BIconjf:
3448 case Builtin::BIconjl: {
3449 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3450 Value *Real = ComplexVal.first;
3451 Value *Imag = ComplexVal.second;
3452 Imag = Builder.CreateFNeg(Imag, "neg");
3453 return RValue::getComplex(std::make_pair(Real, Imag));
3454 }
3455 case Builtin::BI__builtin_creal:
3456 case Builtin::BI__builtin_crealf:
3457 case Builtin::BI__builtin_creall:
3458 case Builtin::BIcreal:
3459 case Builtin::BIcrealf:
3460 case Builtin::BIcreall: {
3461 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3462 return RValue::get(ComplexVal.first);
3463 }
3464
3465 case Builtin::BI__builtin_preserve_access_index: {
3466 // Only enabled preserved access index region when debuginfo
3467 // is available as debuginfo is needed to preserve user-level
3468 // access pattern.
3469 if (!getDebugInfo()) {
3470 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3471 return RValue::get(EmitScalarExpr(E->getArg(0)));
3472 }
3473
3474 // Nested builtin_preserve_access_index() not supported
3476 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3477 return RValue::get(EmitScalarExpr(E->getArg(0)));
3478 }
3479
3480 IsInPreservedAIRegion = true;
3481 Value *Res = EmitScalarExpr(E->getArg(0));
3482 IsInPreservedAIRegion = false;
3483 return RValue::get(Res);
3484 }
3485
3486 case Builtin::BI__builtin_cimag:
3487 case Builtin::BI__builtin_cimagf:
3488 case Builtin::BI__builtin_cimagl:
3489 case Builtin::BIcimag:
3490 case Builtin::BIcimagf:
3491 case Builtin::BIcimagl: {
3492 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3493 return RValue::get(ComplexVal.second);
3494 }
3495
3496 case Builtin::BI__builtin_clrsb:
3497 case Builtin::BI__builtin_clrsbl:
3498 case Builtin::BI__builtin_clrsbll: {
3499 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3500 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3501
3502 llvm::Type *ArgType = ArgValue->getType();
3503 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3504
3505 llvm::Type *ResultType = ConvertType(E->getType());
3506 Value *Zero = llvm::Constant::getNullValue(ArgType);
3507 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3508 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3509 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3510 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3511 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3512 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3513 "cast");
3514 return RValue::get(Result);
3515 }
3516 case Builtin::BI__builtin_ctzs:
3517 case Builtin::BI__builtin_ctz:
3518 case Builtin::BI__builtin_ctzl:
3519 case Builtin::BI__builtin_ctzll:
3520 case Builtin::BI__builtin_ctzg: {
3521 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3522 E->getNumArgs() > 1;
3523
3524 Value *ArgValue =
3525 HasFallback ? EmitScalarExpr(E->getArg(0))
3527
3528 llvm::Type *ArgType = ArgValue->getType();
3529 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3530
3531 llvm::Type *ResultType = ConvertType(E->getType());
3532 Value *ZeroUndef =
3533 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3534 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3535 if (Result->getType() != ResultType)
3536 Result =
3537 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3538 if (!HasFallback)
3539 return RValue::get(Result);
3540
3541 Value *Zero = Constant::getNullValue(ArgType);
3542 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3543 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3544 Value *ResultOrFallback =
3545 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3546 return RValue::get(ResultOrFallback);
3547 }
3548 case Builtin::BI__builtin_clzs:
3549 case Builtin::BI__builtin_clz:
3550 case Builtin::BI__builtin_clzl:
3551 case Builtin::BI__builtin_clzll:
3552 case Builtin::BI__builtin_clzg: {
3553 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3554 E->getNumArgs() > 1;
3555
3556 Value *ArgValue =
3557 HasFallback ? EmitScalarExpr(E->getArg(0))
3559
3560 llvm::Type *ArgType = ArgValue->getType();
3561 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3562
3563 llvm::Type *ResultType = ConvertType(E->getType());
3564 Value *ZeroUndef =
3565 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3566 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3567 if (Result->getType() != ResultType)
3568 Result =
3569 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3570 if (!HasFallback)
3571 return RValue::get(Result);
3572
3573 Value *Zero = Constant::getNullValue(ArgType);
3574 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3575 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3576 Value *ResultOrFallback =
3577 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3578 return RValue::get(ResultOrFallback);
3579 }
3580 case Builtin::BI__builtin_ffs:
3581 case Builtin::BI__builtin_ffsl:
3582 case Builtin::BI__builtin_ffsll: {
3583 // ffs(x) -> x ? cttz(x) + 1 : 0
3584 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3585
3586 llvm::Type *ArgType = ArgValue->getType();
3587 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3588
3589 llvm::Type *ResultType = ConvertType(E->getType());
3590 Value *Tmp =
3591 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3592 llvm::ConstantInt::get(ArgType, 1));
3593 Value *Zero = llvm::Constant::getNullValue(ArgType);
3594 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3595 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3596 if (Result->getType() != ResultType)
3597 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3598 "cast");
3599 return RValue::get(Result);
3600 }
3601 case Builtin::BI__builtin_parity:
3602 case Builtin::BI__builtin_parityl:
3603 case Builtin::BI__builtin_parityll: {
3604 // parity(x) -> ctpop(x) & 1
3605 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3606
3607 llvm::Type *ArgType = ArgValue->getType();
3608 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3609
3610 llvm::Type *ResultType = ConvertType(E->getType());
3611 Value *Tmp = Builder.CreateCall(F, ArgValue);
3612 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3613 if (Result->getType() != ResultType)
3614 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3615 "cast");
3616 return RValue::get(Result);
3617 }
3618 case Builtin::BI__lzcnt16:
3619 case Builtin::BI__lzcnt:
3620 case Builtin::BI__lzcnt64: {
3621 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3622
3623 llvm::Type *ArgType = ArgValue->getType();
3624 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3625
3626 llvm::Type *ResultType = ConvertType(E->getType());
3627 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3628 if (Result->getType() != ResultType)
3629 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3630 "cast");
3631 return RValue::get(Result);
3632 }
3633 case Builtin::BI__popcnt16:
3634 case Builtin::BI__popcnt:
3635 case Builtin::BI__popcnt64:
3636 case Builtin::BI__builtin_popcount:
3637 case Builtin::BI__builtin_popcountl:
3638 case Builtin::BI__builtin_popcountll:
3639 case Builtin::BI__builtin_popcountg: {
3640 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3641
3642 llvm::Type *ArgType = ArgValue->getType();
3643 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3644
3645 llvm::Type *ResultType = ConvertType(E->getType());
3646 Value *Result = Builder.CreateCall(F, ArgValue);
3647 if (Result->getType() != ResultType)
3648 Result =
3649 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3650 return RValue::get(Result);
3651 }
3652 case Builtin::BI__builtin_unpredictable: {
3653 // Always return the argument of __builtin_unpredictable. LLVM does not
3654 // handle this builtin. Metadata for this builtin should be added directly
3655 // to instructions such as branches or switches that use it.
3656 return RValue::get(EmitScalarExpr(E->getArg(0)));
3657 }
3658 case Builtin::BI__builtin_expect: {
3659 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3660 llvm::Type *ArgType = ArgValue->getType();
3661
3662 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3663 // Don't generate llvm.expect on -O0 as the backend won't use it for
3664 // anything.
3665 // Note, we still IRGen ExpectedValue because it could have side-effects.
3666 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3667 return RValue::get(ArgValue);
3668
3669 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3670 Value *Result =
3671 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3672 return RValue::get(Result);
3673 }
3674 case Builtin::BI__builtin_expect_with_probability: {
3675 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3676 llvm::Type *ArgType = ArgValue->getType();
3677
3678 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3679 llvm::APFloat Probability(0.0);
3680 const Expr *ProbArg = E->getArg(2);
3681 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3682 assert(EvalSucceed && "probability should be able to evaluate as float");
3683 (void)EvalSucceed;
3684 bool LoseInfo = false;
3685 Probability.convert(llvm::APFloat::IEEEdouble(),
3686 llvm::RoundingMode::Dynamic, &LoseInfo);
3687 llvm::Type *Ty = ConvertType(ProbArg->getType());
3688 Constant *Confidence = ConstantFP::get(Ty, Probability);
3689 // Don't generate llvm.expect.with.probability on -O0 as the backend
3690 // won't use it for anything.
3691 // Note, we still IRGen ExpectedValue because it could have side-effects.
3692 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3693 return RValue::get(ArgValue);
3694
3695 Function *FnExpect =
3696 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3697 Value *Result = Builder.CreateCall(
3698 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3699 return RValue::get(Result);
3700 }
3701 case Builtin::BI__builtin_assume_aligned: {
3702 const Expr *Ptr = E->getArg(0);
3703 Value *PtrValue = EmitScalarExpr(Ptr);
3704 Value *OffsetValue =
3705 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3706
3707 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3708 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3709 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3710 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3711 llvm::Value::MaximumAlignment);
3712
3713 emitAlignmentAssumption(PtrValue, Ptr,
3714 /*The expr loc is sufficient.*/ SourceLocation(),
3715 AlignmentCI, OffsetValue);
3716 return RValue::get(PtrValue);
3717 }
3718 case Builtin::BI__assume:
3719 case Builtin::BI__builtin_assume: {
3720 if (E->getArg(0)->HasSideEffects(getContext()))
3721 return RValue::get(nullptr);
3722
3723 Value *ArgValue = EmitCheckedArgForAssume(E->getArg(0));
3724 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3725 Builder.CreateCall(FnAssume, ArgValue);
3726 return RValue::get(nullptr);
3727 }
3728 case Builtin::BI__builtin_assume_separate_storage: {
3729 const Expr *Arg0 = E->getArg(0);
3730 const Expr *Arg1 = E->getArg(1);
3731
3732 Value *Value0 = EmitScalarExpr(Arg0);
3733 Value *Value1 = EmitScalarExpr(Arg1);
3734
3735 Value *Values[] = {Value0, Value1};
3736 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3737 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3738 return RValue::get(nullptr);
3739 }
3740 case Builtin::BI__builtin_allow_runtime_check: {
3741 StringRef Kind =
3742 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3743 LLVMContext &Ctx = CGM.getLLVMContext();
3744 llvm::Value *Allow = Builder.CreateCall(
3745 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3746 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3747 return RValue::get(Allow);
3748 }
3749 case Builtin::BI__arithmetic_fence: {
3750 // Create the builtin call if FastMath is selected, and the target
3751 // supports the builtin, otherwise just return the argument.
3752 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3753 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3754 bool isArithmeticFenceEnabled =
3755 FMF.allowReassoc() &&
3757 QualType ArgType = E->getArg(0)->getType();
3758 if (ArgType->isComplexType()) {
3759 if (isArithmeticFenceEnabled) {
3760 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3761 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3762 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3763 ConvertType(ElementType));
3764 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3765 ConvertType(ElementType));
3766 return RValue::getComplex(std::make_pair(Real, Imag));
3767 }
3768 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3769 Value *Real = ComplexVal.first;
3770 Value *Imag = ComplexVal.second;
3771 return RValue::getComplex(std::make_pair(Real, Imag));
3772 }
3773 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3774 if (isArithmeticFenceEnabled)
3775 return RValue::get(
3776 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3777 return RValue::get(ArgValue);
3778 }
3779 case Builtin::BI__builtin_bswap16:
3780 case Builtin::BI__builtin_bswap32:
3781 case Builtin::BI__builtin_bswap64:
3782 case Builtin::BI_byteswap_ushort:
3783 case Builtin::BI_byteswap_ulong:
3784 case Builtin::BI_byteswap_uint64: {
3785 return RValue::get(
3786 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3787 }
3788 case Builtin::BI__builtin_bitreverse8:
3789 case Builtin::BI__builtin_bitreverse16:
3790 case Builtin::BI__builtin_bitreverse32:
3791 case Builtin::BI__builtin_bitreverse64: {
3792 return RValue::get(
3793 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3794 }
3795 case Builtin::BI__builtin_rotateleft8:
3796 case Builtin::BI__builtin_rotateleft16:
3797 case Builtin::BI__builtin_rotateleft32:
3798 case Builtin::BI__builtin_rotateleft64:
3799 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3800 case Builtin::BI_rotl16:
3801 case Builtin::BI_rotl:
3802 case Builtin::BI_lrotl:
3803 case Builtin::BI_rotl64:
3804 return emitRotate(E, false);
3805
3806 case Builtin::BI__builtin_rotateright8:
3807 case Builtin::BI__builtin_rotateright16:
3808 case Builtin::BI__builtin_rotateright32:
3809 case Builtin::BI__builtin_rotateright64:
3810 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3811 case Builtin::BI_rotr16:
3812 case Builtin::BI_rotr:
3813 case Builtin::BI_lrotr:
3814 case Builtin::BI_rotr64:
3815 return emitRotate(E, true);
3816
3817 case Builtin::BI__builtin_constant_p: {
3818 llvm::Type *ResultType = ConvertType(E->getType());
3819
3820 const Expr *Arg = E->getArg(0);
3821 QualType ArgType = Arg->getType();
3822 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3823 // and likely a mistake.
3824 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3825 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3826 // Per the GCC documentation, only numeric constants are recognized after
3827 // inlining.
3828 return RValue::get(ConstantInt::get(ResultType, 0));
3829
3830 if (Arg->HasSideEffects(getContext()))
3831 // The argument is unevaluated, so be conservative if it might have
3832 // side-effects.
3833 return RValue::get(ConstantInt::get(ResultType, 0));
3834
3835 Value *ArgValue = EmitScalarExpr(Arg);
3836 if (ArgType->isObjCObjectPointerType()) {
3837 // Convert Objective-C objects to id because we cannot distinguish between
3838 // LLVM types for Obj-C classes as they are opaque.
3839 ArgType = CGM.getContext().getObjCIdType();
3840 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3841 }
3842 Function *F =
3843 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3844 Value *Result = Builder.CreateCall(F, ArgValue);
3845 if (Result->getType() != ResultType)
3846 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3847 return RValue::get(Result);
3848 }
3849 case Builtin::BI__builtin_dynamic_object_size:
3850 case Builtin::BI__builtin_object_size: {
3851 unsigned Type =
3852 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3853 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3854
3855 // We pass this builtin onto the optimizer so that it can figure out the
3856 // object size in more complex cases.
3857 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3858 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3859 /*EmittedE=*/nullptr, IsDynamic));
3860 }
3861 case Builtin::BI__builtin_counted_by_ref: {
3862 // Default to returning '(void *) 0'.
3863 llvm::Value *Result = llvm::ConstantPointerNull::get(
3864 llvm::PointerType::getUnqual(getLLVMContext()));
3865
3866 const Expr *Arg = E->getArg(0)->IgnoreParenImpCasts();
3867
3868 if (auto *UO = dyn_cast<UnaryOperator>(Arg);
3869 UO && UO->getOpcode() == UO_AddrOf) {
3870 Arg = UO->getSubExpr()->IgnoreParenImpCasts();
3871
3872 if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Arg))
3873 Arg = ASE->getBase()->IgnoreParenImpCasts();
3874 }
3875
3876 if (const MemberExpr *ME = dyn_cast_if_present<MemberExpr>(Arg)) {
3877 if (auto *CATy =
3878 ME->getMemberDecl()->getType()->getAs<CountAttributedType>();
3879 CATy && CATy->getKind() == CountAttributedType::CountedBy) {
3880 const auto *FAMDecl = cast<FieldDecl>(ME->getMemberDecl());
3881 if (const FieldDecl *CountFD = FAMDecl->findCountedByField())
3882 Result = GetCountedByFieldExprGEP(Arg, FAMDecl, CountFD);
3883 else
3884 llvm::report_fatal_error("Cannot find the counted_by 'count' field");
3885 }
3886 }
3887
3888 return RValue::get(Result);
3889 }
3890 case Builtin::BI__builtin_prefetch: {
3891 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3892 // FIXME: Technically these constants should of type 'int', yes?
3893 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3894 llvm::ConstantInt::get(Int32Ty, 0);
3895 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3896 llvm::ConstantInt::get(Int32Ty, 3);
3897 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3898 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3899 Builder.CreateCall(F, {Address, RW, Locality, Data});
3900 return RValue::get(nullptr);
3901 }
3902 case Builtin::BI__builtin_readcyclecounter: {
3903 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3904 return RValue::get(Builder.CreateCall(F));
3905 }
3906 case Builtin::BI__builtin_readsteadycounter: {
3907 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3908 return RValue::get(Builder.CreateCall(F));
3909 }
3910 case Builtin::BI__builtin___clear_cache: {
3911 Value *Begin = EmitScalarExpr(E->getArg(0));
3912 Value *End = EmitScalarExpr(E->getArg(1));
3913 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3914 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3915 }
3916 case Builtin::BI__builtin_trap:
3917 EmitTrapCall(Intrinsic::trap);
3918 return RValue::get(nullptr);
3919 case Builtin::BI__builtin_verbose_trap: {
3920 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3921 if (getDebugInfo()) {
3922 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3923 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3924 *E->getArg(1)->tryEvaluateString(getContext()));
3925 }
3926 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3927 // Currently no attempt is made to prevent traps from being merged.
3928 EmitTrapCall(Intrinsic::trap);
3929 return RValue::get(nullptr);
3930 }
3931 case Builtin::BI__debugbreak:
3932 EmitTrapCall(Intrinsic::debugtrap);
3933 return RValue::get(nullptr);
3934 case Builtin::BI__builtin_unreachable: {
3936
3937 // We do need to preserve an insertion point.
3938 EmitBlock(createBasicBlock("unreachable.cont"));
3939
3940 return RValue::get(nullptr);
3941 }
3942
3943 case Builtin::BI__builtin_powi:
3944 case Builtin::BI__builtin_powif:
3945 case Builtin::BI__builtin_powil: {
3946 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3947 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3948
3949 if (Builder.getIsFPConstrained()) {
3950 // FIXME: llvm.powi has 2 mangling types,
3951 // llvm.experimental.constrained.powi has one.
3952 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3953 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3954 Src0->getType());
3955 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3956 }
3957
3958 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3959 { Src0->getType(), Src1->getType() });
3960 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3961 }
3962 case Builtin::BI__builtin_frexpl: {
3963 // Linux PPC will not be adding additional PPCDoubleDouble support.
3964 // WIP to switch default to IEEE long double. Will emit libcall for
3965 // frexpl instead of legalizing this type in the BE.
3966 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3967 break;
3968 [[fallthrough]];
3969 }
3970 case Builtin::BI__builtin_frexp:
3971 case Builtin::BI__builtin_frexpf:
3972 case Builtin::BI__builtin_frexpf128:
3973 case Builtin::BI__builtin_frexpf16:
3974 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3975 case Builtin::BI__builtin_isgreater:
3976 case Builtin::BI__builtin_isgreaterequal:
3977 case Builtin::BI__builtin_isless:
3978 case Builtin::BI__builtin_islessequal:
3979 case Builtin::BI__builtin_islessgreater:
3980 case Builtin::BI__builtin_isunordered: {
3981 // Ordered comparisons: we know the arguments to these are matching scalar
3982 // floating point values.
3983 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3984 Value *LHS = EmitScalarExpr(E->getArg(0));
3985 Value *RHS = EmitScalarExpr(E->getArg(1));
3986
3987 switch (BuiltinID) {
3988 default: llvm_unreachable("Unknown ordered comparison");
3989 case Builtin::BI__builtin_isgreater:
3990 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3991 break;
3992 case Builtin::BI__builtin_isgreaterequal:
3993 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3994 break;
3995 case Builtin::BI__builtin_isless:
3996 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3997 break;
3998 case Builtin::BI__builtin_islessequal:
3999 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
4000 break;
4001 case Builtin::BI__builtin_islessgreater:
4002 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
4003 break;
4004 case Builtin::BI__builtin_isunordered:
4005 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
4006 break;
4007 }
4008 // ZExt bool to int type.
4009 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
4010 }
4011
4012 case Builtin::BI__builtin_isnan: {
4013 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4014 Value *V = EmitScalarExpr(E->getArg(0));
4015 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4016 return RValue::get(Result);
4017 return RValue::get(
4018 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
4019 ConvertType(E->getType())));
4020 }
4021
4022 case Builtin::BI__builtin_issignaling: {
4023 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4024 Value *V = EmitScalarExpr(E->getArg(0));
4025 return RValue::get(
4026 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
4027 ConvertType(E->getType())));
4028 }
4029
4030 case Builtin::BI__builtin_isinf: {
4031 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4032 Value *V = EmitScalarExpr(E->getArg(0));
4033 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4034 return RValue::get(Result);
4035 return RValue::get(
4036 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
4037 ConvertType(E->getType())));
4038 }
4039
4040 case Builtin::BIfinite:
4041 case Builtin::BI__finite:
4042 case Builtin::BIfinitef:
4043 case Builtin::BI__finitef:
4044 case Builtin::BIfinitel:
4045 case Builtin::BI__finitel:
4046 case Builtin::BI__builtin_isfinite: {
4047 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4048 Value *V = EmitScalarExpr(E->getArg(0));
4049 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4050 return RValue::get(Result);
4051 return RValue::get(
4052 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
4053 ConvertType(E->getType())));
4054 }
4055
4056 case Builtin::BI__builtin_isnormal: {
4057 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4058 Value *V = EmitScalarExpr(E->getArg(0));
4059 return RValue::get(
4060 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
4061 ConvertType(E->getType())));
4062 }
4063
4064 case Builtin::BI__builtin_issubnormal: {
4065 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4066 Value *V = EmitScalarExpr(E->getArg(0));
4067 return RValue::get(
4068 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
4069 ConvertType(E->getType())));
4070 }
4071
4072 case Builtin::BI__builtin_iszero: {
4073 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4074 Value *V = EmitScalarExpr(E->getArg(0));
4075 return RValue::get(
4076 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
4077 ConvertType(E->getType())));
4078 }
4079
4080 case Builtin::BI__builtin_isfpclass: {
4082 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
4083 break;
4084 uint64_t Test = Result.Val.getInt().getLimitedValue();
4085 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4086 Value *V = EmitScalarExpr(E->getArg(0));
4087 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
4088 ConvertType(E->getType())));
4089 }
4090
4091 case Builtin::BI__builtin_nondeterministic_value: {
4092 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
4093
4094 Value *Result = PoisonValue::get(Ty);
4095 Result = Builder.CreateFreeze(Result);
4096
4097 return RValue::get(Result);
4098 }
4099
4100 case Builtin::BI__builtin_elementwise_abs: {
4101 Value *Result;
4102 QualType QT = E->getArg(0)->getType();
4103
4104 if (auto *VecTy = QT->getAs<VectorType>())
4105 QT = VecTy->getElementType();
4106 if (QT->isIntegerType())
4107 Result = Builder.CreateBinaryIntrinsic(
4108 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
4109 Builder.getFalse(), nullptr, "elt.abs");
4110 else
4111 Result = emitBuiltinWithOneOverloadedType<1>(
4112 *this, E, llvm::Intrinsic::fabs, "elt.abs");
4113
4114 return RValue::get(Result);
4115 }
4116 case Builtin::BI__builtin_elementwise_acos:
4117 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4118 *this, E, llvm::Intrinsic::acos, "elt.acos"));
4119 case Builtin::BI__builtin_elementwise_asin:
4120 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4121 *this, E, llvm::Intrinsic::asin, "elt.asin"));
4122 case Builtin::BI__builtin_elementwise_atan:
4123 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4124 *this, E, llvm::Intrinsic::atan, "elt.atan"));
4125 case Builtin::BI__builtin_elementwise_atan2:
4126 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4127 *this, E, llvm::Intrinsic::atan2, "elt.atan2"));
4128 case Builtin::BI__builtin_elementwise_ceil:
4129 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4130 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
4131 case Builtin::BI__builtin_elementwise_exp:
4132 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4133 *this, E, llvm::Intrinsic::exp, "elt.exp"));
4134 case Builtin::BI__builtin_elementwise_exp2:
4135 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4136 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
4137 case Builtin::BI__builtin_elementwise_log:
4138 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4139 *this, E, llvm::Intrinsic::log, "elt.log"));
4140 case Builtin::BI__builtin_elementwise_log2:
4141 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4142 *this, E, llvm::Intrinsic::log2, "elt.log2"));
4143 case Builtin::BI__builtin_elementwise_log10:
4144 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4145 *this, E, llvm::Intrinsic::log10, "elt.log10"));
4146 case Builtin::BI__builtin_elementwise_pow: {
4147 return RValue::get(
4148 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
4149 }
4150 case Builtin::BI__builtin_elementwise_bitreverse:
4151 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4152 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
4153 case Builtin::BI__builtin_elementwise_cos:
4154 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4155 *this, E, llvm::Intrinsic::cos, "elt.cos"));
4156 case Builtin::BI__builtin_elementwise_cosh:
4157 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4158 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
4159 case Builtin::BI__builtin_elementwise_floor:
4160 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4161 *this, E, llvm::Intrinsic::floor, "elt.floor"));
4162 case Builtin::BI__builtin_elementwise_popcount:
4163 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4164 *this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
4165 case Builtin::BI__builtin_elementwise_roundeven:
4166 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4167 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
4168 case Builtin::BI__builtin_elementwise_round:
4169 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4170 *this, E, llvm::Intrinsic::round, "elt.round"));
4171 case Builtin::BI__builtin_elementwise_rint:
4172 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4173 *this, E, llvm::Intrinsic::rint, "elt.rint"));
4174 case Builtin::BI__builtin_elementwise_nearbyint:
4175 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4176 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
4177 case Builtin::BI__builtin_elementwise_sin:
4178 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4179 *this, E, llvm::Intrinsic::sin, "elt.sin"));
4180 case Builtin::BI__builtin_elementwise_sinh:
4181 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4182 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
4183 case Builtin::BI__builtin_elementwise_tan:
4184 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4185 *this, E, llvm::Intrinsic::tan, "elt.tan"));
4186 case Builtin::BI__builtin_elementwise_tanh:
4187 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4188 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
4189 case Builtin::BI__builtin_elementwise_trunc:
4190 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4191 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
4192 case Builtin::BI__builtin_elementwise_canonicalize:
4193 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4194 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
4195 case Builtin::BI__builtin_elementwise_copysign:
4196 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4197 *this, E, llvm::Intrinsic::copysign));
4198 case Builtin::BI__builtin_elementwise_fma:
4199 return RValue::get(
4200 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
4201 case Builtin::BI__builtin_elementwise_add_sat:
4202 case Builtin::BI__builtin_elementwise_sub_sat: {
4203 Value *Op0 = EmitScalarExpr(E->getArg(0));
4204 Value *Op1 = EmitScalarExpr(E->getArg(1));
4205 Value *Result;
4206 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
4207 QualType Ty = E->getArg(0)->getType();
4208 if (auto *VecTy = Ty->getAs<VectorType>())
4209 Ty = VecTy->getElementType();
4210 bool IsSigned = Ty->isSignedIntegerType();
4211 unsigned Opc;
4212 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
4213 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
4214 else
4215 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
4216 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
4217 return RValue::get(Result);
4218 }
4219
4220 case Builtin::BI__builtin_elementwise_max: {
4221 Value *Op0 = EmitScalarExpr(E->getArg(0));
4222 Value *Op1 = EmitScalarExpr(E->getArg(1));
4223 Value *Result;
4224 if (Op0->getType()->isIntOrIntVectorTy()) {
4225 QualType Ty = E->getArg(0)->getType();
4226 if (auto *VecTy = Ty->getAs<VectorType>())
4227 Ty = VecTy->getElementType();
4228 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4229 ? llvm::Intrinsic::smax
4230 : llvm::Intrinsic::umax,
4231 Op0, Op1, nullptr, "elt.max");
4232 } else
4233 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
4234 return RValue::get(Result);
4235 }
4236 case Builtin::BI__builtin_elementwise_min: {
4237 Value *Op0 = EmitScalarExpr(E->getArg(0));
4238 Value *Op1 = EmitScalarExpr(E->getArg(1));
4239 Value *Result;
4240 if (Op0->getType()->isIntOrIntVectorTy()) {
4241 QualType Ty = E->getArg(0)->getType();
4242 if (auto *VecTy = Ty->getAs<VectorType>())
4243 Ty = VecTy->getElementType();
4244 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4245 ? llvm::Intrinsic::smin
4246 : llvm::Intrinsic::umin,
4247 Op0, Op1, nullptr, "elt.min");
4248 } else
4249 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
4250 return RValue::get(Result);
4251 }
4252
4253 case Builtin::BI__builtin_elementwise_maximum: {
4254 Value *Op0 = EmitScalarExpr(E->getArg(0));
4255 Value *Op1 = EmitScalarExpr(E->getArg(1));
4256 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
4257 Op1, nullptr, "elt.maximum");
4258 return RValue::get(Result);
4259 }
4260
4261 case Builtin::BI__builtin_elementwise_minimum: {
4262 Value *Op0 = EmitScalarExpr(E->getArg(0));
4263 Value *Op1 = EmitScalarExpr(E->getArg(1));
4264 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
4265 Op1, nullptr, "elt.minimum");
4266 return RValue::get(Result);
4267 }
4268
4269 case Builtin::BI__builtin_reduce_max: {
4270 auto GetIntrinsicID = [this](QualType QT) {
4271 if (auto *VecTy = QT->getAs<VectorType>())
4272 QT = VecTy->getElementType();
4273 else if (QT->isSizelessVectorType())
4275
4276 if (QT->isSignedIntegerType())
4277 return llvm::Intrinsic::vector_reduce_smax;
4278 if (QT->isUnsignedIntegerType())
4279 return llvm::Intrinsic::vector_reduce_umax;
4280 assert(QT->isFloatingType() && "must have a float here");
4281 return llvm::Intrinsic::vector_reduce_fmax;
4282 };
4283 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4284 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4285 }
4286
4287 case Builtin::BI__builtin_reduce_min: {
4288 auto GetIntrinsicID = [this](QualType QT) {
4289 if (auto *VecTy = QT->getAs<VectorType>())
4290 QT = VecTy->getElementType();
4291 else if (QT->isSizelessVectorType())
4293
4294 if (QT->isSignedIntegerType())
4295 return llvm::Intrinsic::vector_reduce_smin;
4296 if (QT->isUnsignedIntegerType())
4297 return llvm::Intrinsic::vector_reduce_umin;
4298 assert(QT->isFloatingType() && "must have a float here");
4299 return llvm::Intrinsic::vector_reduce_fmin;
4300 };
4301
4302 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4303 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4304 }
4305
4306 case Builtin::BI__builtin_reduce_add:
4307 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4308 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
4309 case Builtin::BI__builtin_reduce_mul:
4310 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4311 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
4312 case Builtin::BI__builtin_reduce_xor:
4313 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4314 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
4315 case Builtin::BI__builtin_reduce_or:
4316 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4317 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
4318 case Builtin::BI__builtin_reduce_and:
4319 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4320 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
4321 case Builtin::BI__builtin_reduce_maximum:
4322 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4323 *this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
4324 case Builtin::BI__builtin_reduce_minimum:
4325 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4326 *this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
4327
4328 case Builtin::BI__builtin_matrix_transpose: {
4329 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
4330 Value *MatValue = EmitScalarExpr(E->getArg(0));
4331 MatrixBuilder MB(Builder);
4332 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
4333 MatrixTy->getNumColumns());
4334 return RValue::get(Result);
4335 }
4336
4337 case Builtin::BI__builtin_matrix_column_major_load: {
4338 MatrixBuilder MB(Builder);
4339 // Emit everything that isn't dependent on the first parameter type
4340 Value *Stride = EmitScalarExpr(E->getArg(3));
4341 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
4342 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
4343 assert(PtrTy && "arg0 must be of pointer type");
4344 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4345
4346 Address Src = EmitPointerWithAlignment(E->getArg(0));
4348 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4349 0);
4350 Value *Result = MB.CreateColumnMajorLoad(
4351 Src.getElementType(), Src.emitRawPointer(*this),
4352 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4353 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4354 return RValue::get(Result);
4355 }
4356
4357 case Builtin::BI__builtin_matrix_column_major_store: {
4358 MatrixBuilder MB(Builder);
4359 Value *Matrix = EmitScalarExpr(E->getArg(0));
4360 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4361 Value *Stride = EmitScalarExpr(E->getArg(2));
4362
4363 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4364 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4365 assert(PtrTy && "arg1 must be of pointer type");
4366 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4367
4369 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4370 0);
4371 Value *Result = MB.CreateColumnMajorStore(
4372 Matrix, Dst.emitRawPointer(*this),
4373 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4374 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4375 return RValue::get(Result);
4376 }
4377
4378 case Builtin::BI__builtin_isinf_sign: {
4379 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4380 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4381 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4382 Value *Arg = EmitScalarExpr(E->getArg(0));
4383 Value *AbsArg = EmitFAbs(*this, Arg);
4384 Value *IsInf = Builder.CreateFCmpOEQ(
4385 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4386 Value *IsNeg = EmitSignBit(*this, Arg);
4387
4388 llvm::Type *IntTy = ConvertType(E->getType());
4389 Value *Zero = Constant::getNullValue(IntTy);
4390 Value *One = ConstantInt::get(IntTy, 1);
4391 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4392 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4393 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4394 return RValue::get(Result);
4395 }
4396
4397 case Builtin::BI__builtin_flt_rounds: {
4398 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4399
4400 llvm::Type *ResultType = ConvertType(E->getType());
4401 Value *Result = Builder.CreateCall(F);
4402 if (Result->getType() != ResultType)
4403 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4404 "cast");
4405 return RValue::get(Result);
4406 }
4407
4408 case Builtin::BI__builtin_set_flt_rounds: {
4409 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4410
4411 Value *V = EmitScalarExpr(E->getArg(0));
4412 Builder.CreateCall(F, V);
4413 return RValue::get(nullptr);
4414 }
4415
4416 case Builtin::BI__builtin_fpclassify: {
4417 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4418 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4419 Value *V = EmitScalarExpr(E->getArg(5));
4420 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4421
4422 // Create Result
4423 BasicBlock *Begin = Builder.GetInsertBlock();
4424 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4425 Builder.SetInsertPoint(End);
4426 PHINode *Result =
4427 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4428 "fpclassify_result");
4429
4430 // if (V==0) return FP_ZERO
4431 Builder.SetInsertPoint(Begin);
4432 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4433 "iszero");
4434 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4435 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4436 Builder.CreateCondBr(IsZero, End, NotZero);
4437 Result->addIncoming(ZeroLiteral, Begin);
4438
4439 // if (V != V) return FP_NAN
4440 Builder.SetInsertPoint(NotZero);
4441 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4442 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4443 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4444 Builder.CreateCondBr(IsNan, End, NotNan);
4445 Result->addIncoming(NanLiteral, NotZero);
4446
4447 // if (fabs(V) == infinity) return FP_INFINITY
4448 Builder.SetInsertPoint(NotNan);
4449 Value *VAbs = EmitFAbs(*this, V);
4450 Value *IsInf =
4451 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4452 "isinf");
4453 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4454 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4455 Builder.CreateCondBr(IsInf, End, NotInf);
4456 Result->addIncoming(InfLiteral, NotNan);
4457
4458 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4459 Builder.SetInsertPoint(NotInf);
4460 APFloat Smallest = APFloat::getSmallestNormalized(
4461 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4462 Value *IsNormal =
4463 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4464 "isnormal");
4465 Value *NormalResult =
4466 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4467 EmitScalarExpr(E->getArg(3)));
4468 Builder.CreateBr(End);
4469 Result->addIncoming(NormalResult, NotInf);
4470
4471 // return Result
4472 Builder.SetInsertPoint(End);
4473 return RValue::get(Result);
4474 }
4475
4476 // An alloca will always return a pointer to the alloca (stack) address
4477 // space. This address space need not be the same as the AST / Language
4478 // default (e.g. in C / C++ auto vars are in the generic address space). At
4479 // the AST level this is handled within CreateTempAlloca et al., but for the
4480 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4481 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4482 case Builtin::BIalloca:
4483 case Builtin::BI_alloca:
4484 case Builtin::BI__builtin_alloca_uninitialized:
4485 case Builtin::BI__builtin_alloca: {
4486 Value *Size = EmitScalarExpr(E->getArg(0));
4487 const TargetInfo &TI = getContext().getTargetInfo();
4488 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4489 const Align SuitableAlignmentInBytes =
4490 CGM.getContext()
4492 .getAsAlign();
4493 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4494 AI->setAlignment(SuitableAlignmentInBytes);
4495 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4496 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4499 if (AAS != EAS) {
4500 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4501 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4502 EAS, Ty));
4503 }
4504 return RValue::get(AI);
4505 }
4506
4507 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4508 case Builtin::BI__builtin_alloca_with_align: {
4509 Value *Size = EmitScalarExpr(E->getArg(0));
4510 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4511 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4512 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4513 const Align AlignmentInBytes =
4514 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4515 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4516 AI->setAlignment(AlignmentInBytes);
4517 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4518 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4521 if (AAS != EAS) {
4522 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4523 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4524 EAS, Ty));
4525 }
4526 return RValue::get(AI);
4527 }
4528
4529 case Builtin::BIbzero:
4530 case Builtin::BI__builtin_bzero: {
4531 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4532 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4533 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4534 E->getArg(0)->getExprLoc(), FD, 0);
4535 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4536 return RValue::get(nullptr);
4537 }
4538
4539 case Builtin::BIbcopy:
4540 case Builtin::BI__builtin_bcopy: {
4541 Address Src = EmitPointerWithAlignment(E->getArg(0));
4542 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4543 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4545 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4546 0);
4548 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4549 0);
4550 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4551 return RValue::get(nullptr);
4552 }
4553
4554 case Builtin::BImemcpy:
4555 case Builtin::BI__builtin_memcpy:
4556 case Builtin::BImempcpy:
4557 case Builtin::BI__builtin_mempcpy: {
4558 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4559 Address Src = EmitPointerWithAlignment(E->getArg(1));
4560 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4561 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4562 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4563 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4564 if (BuiltinID == Builtin::BImempcpy ||
4565 BuiltinID == Builtin::BI__builtin_mempcpy)
4567 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4568 else
4569 return RValue::get(Dest, *this);
4570 }
4571
4572 case Builtin::BI__builtin_memcpy_inline: {
4573 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4574 Address Src = EmitPointerWithAlignment(E->getArg(1));
4575 uint64_t Size =
4576 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4577 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4578 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4579 Builder.CreateMemCpyInline(Dest, Src, Size);
4580 return RValue::get(nullptr);
4581 }
4582
4583 case Builtin::BI__builtin_char_memchr:
4584 BuiltinID = Builtin::BI__builtin_memchr;
4585 break;
4586
4587 case Builtin::BI__builtin___memcpy_chk: {
4588 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4589 Expr::EvalResult SizeResult, DstSizeResult;
4590 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4591 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4592 break;
4593 llvm::APSInt Size = SizeResult.Val.getInt();
4594 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4595 if (Size.ugt(DstSize))
4596 break;
4597 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4598 Address Src = EmitPointerWithAlignment(E->getArg(1));
4599 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4600 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4601 return RValue::get(Dest, *this);
4602 }
4603
4604 case Builtin::BI__builtin_objc_memmove_collectable: {
4605 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4606 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4607 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4609 DestAddr, SrcAddr, SizeVal);
4610 return RValue::get(DestAddr, *this);
4611 }
4612
4613 case Builtin::BI__builtin___memmove_chk: {
4614 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4615 Expr::EvalResult SizeResult, DstSizeResult;
4616 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4617 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4618 break;
4619 llvm::APSInt Size = SizeResult.Val.getInt();
4620 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4621 if (Size.ugt(DstSize))
4622 break;
4623 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4624 Address Src = EmitPointerWithAlignment(E->getArg(1));
4625 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4626 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4627 return RValue::get(Dest, *this);
4628 }
4629
4630 case Builtin::BImemmove:
4631 case Builtin::BI__builtin_memmove: {
4632 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4633 Address Src = EmitPointerWithAlignment(E->getArg(1));
4634 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4635 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4636 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4637 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4638 return RValue::get(Dest, *this);
4639 }
4640 case Builtin::BImemset:
4641 case Builtin::BI__builtin_memset: {
4642 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4643 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4644 Builder.getInt8Ty());
4645 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4646 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4647 E->getArg(0)->getExprLoc(), FD, 0);
4648 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4649 return RValue::get(Dest, *this);
4650 }
4651 case Builtin::BI__builtin_memset_inline: {
4652 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4653 Value *ByteVal =
4654 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4655 uint64_t Size =
4656 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4658 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4659 0);
4660 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4661 return RValue::get(nullptr);
4662 }
4663 case Builtin::BI__builtin___memset_chk: {
4664 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4665 Expr::EvalResult SizeResult, DstSizeResult;
4666 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4667 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4668 break;
4669 llvm::APSInt Size = SizeResult.Val.getInt();
4670 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4671 if (Size.ugt(DstSize))
4672 break;
4673 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4674 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4675 Builder.getInt8Ty());
4676 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4677 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4678 return RValue::get(Dest, *this);
4679 }
4680 case Builtin::BI__builtin_wmemchr: {
4681 // The MSVC runtime library does not provide a definition of wmemchr, so we
4682 // need an inline implementation.
4683 if (!getTarget().getTriple().isOSMSVCRT())
4684 break;
4685
4686 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4687 Value *Str = EmitScalarExpr(E->getArg(0));
4688 Value *Chr = EmitScalarExpr(E->getArg(1));
4689 Value *Size = EmitScalarExpr(E->getArg(2));
4690
4691 BasicBlock *Entry = Builder.GetInsertBlock();
4692 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4693 BasicBlock *Next = createBasicBlock("wmemchr.next");
4694 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4695 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4696 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4697
4698 EmitBlock(CmpEq);
4699 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4700 StrPhi->addIncoming(Str, Entry);
4701 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4702 SizePhi->addIncoming(Size, Entry);
4703 CharUnits WCharAlign =
4705 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4706 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4707 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4708 Builder.CreateCondBr(StrEqChr, Exit, Next);
4709
4710 EmitBlock(Next);
4711 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4712 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4713 Value *NextSizeEq0 =
4714 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4715 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4716 StrPhi->addIncoming(NextStr, Next);
4717 SizePhi->addIncoming(NextSize, Next);
4718
4719 EmitBlock(Exit);
4720 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4721 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4722 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4723 Ret->addIncoming(FoundChr, CmpEq);
4724 return RValue::get(Ret);
4725 }
4726 case Builtin::BI__builtin_wmemcmp: {
4727 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4728 // need an inline implementation.
4729 if (!getTarget().getTriple().isOSMSVCRT())
4730 break;
4731
4732 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4733
4734 Value *Dst = EmitScalarExpr(E->getArg(0));
4735 Value *Src = EmitScalarExpr(E->getArg(1));
4736 Value *Size = EmitScalarExpr(E->getArg(2));
4737
4738 BasicBlock *Entry = Builder.GetInsertBlock();
4739 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4740 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4741 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4742 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4743 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4744 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4745
4746 EmitBlock(CmpGT);
4747 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4748 DstPhi->addIncoming(Dst, Entry);
4749 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4750 SrcPhi->addIncoming(Src, Entry);
4751 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4752 SizePhi->addIncoming(Size, Entry);
4753 CharUnits WCharAlign =
4755 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4756 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4757 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4758 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4759
4760 EmitBlock(CmpLT);
4761 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4762 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4763
4764 EmitBlock(Next);
4765 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4766 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4767 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4768 Value *NextSizeEq0 =
4769 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4770 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4771 DstPhi->addIncoming(NextDst, Next);
4772 SrcPhi->addIncoming(NextSrc, Next);
4773 SizePhi->addIncoming(NextSize, Next);
4774
4775 EmitBlock(Exit);
4776 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4777 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4778 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4779 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4780 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4781 return RValue::get(Ret);
4782 }
4783 case Builtin::BI__builtin_dwarf_cfa: {
4784 // The offset in bytes from the first argument to the CFA.
4785 //
4786 // Why on earth is this in the frontend? Is there any reason at
4787 // all that the backend can't reasonably determine this while
4788 // lowering llvm.eh.dwarf.cfa()?
4789 //
4790 // TODO: If there's a satisfactory reason, add a target hook for
4791 // this instead of hard-coding 0, which is correct for most targets.
4792 int32_t Offset = 0;
4793
4794 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4795 return RValue::get(Builder.CreateCall(F,
4796 llvm::ConstantInt::get(Int32Ty, Offset)));
4797 }
4798 case Builtin::BI__builtin_return_address: {
4799 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4800 getContext().UnsignedIntTy);
4801 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4802 return RValue::get(Builder.CreateCall(F, Depth));
4803 }
4804 case Builtin::BI_ReturnAddress: {
4805 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4806 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4807 }
4808 case Builtin::BI__builtin_frame_address: {
4809 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4810 getContext().UnsignedIntTy);
4811 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4812 return RValue::get(Builder.CreateCall(F, Depth));
4813 }
4814 case Builtin::BI__builtin_extract_return_addr: {
4815 Value *Address = EmitScalarExpr(E->getArg(0));
4817 return RValue::get(Result);
4818 }
4819 case Builtin::BI__builtin_frob_return_addr: {
4820 Value *Address = EmitScalarExpr(E->getArg(0));
4822 return RValue::get(Result);
4823 }
4824 case Builtin::BI__builtin_dwarf_sp_column: {
4825 llvm::IntegerType *Ty
4826 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4828 if (Column == -1) {
4829 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4830 return RValue::get(llvm::UndefValue::get(Ty));
4831 }
4832 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4833 }
4834 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4835 Value *Address = EmitScalarExpr(E->getArg(0));
4836 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4837 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4838 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4839 }
4840 case Builtin::BI__builtin_eh_return: {
4841 Value *Int = EmitScalarExpr(E->getArg(0));
4842 Value *Ptr = EmitScalarExpr(E->getArg(1));
4843
4844 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4845 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4846 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4847 Function *F =
4848 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4849 : Intrinsic::eh_return_i64);
4850 Builder.CreateCall(F, {Int, Ptr});
4851 Builder.CreateUnreachable();
4852
4853 // We do need to preserve an insertion point.
4854 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4855
4856 return RValue::get(nullptr);
4857 }
4858 case Builtin::BI__builtin_unwind_init: {
4859 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4860 Builder.CreateCall(F);
4861 return RValue::get(nullptr);
4862 }
4863 case Builtin::BI__builtin_extend_pointer: {
4864 // Extends a pointer to the size of an _Unwind_Word, which is
4865 // uint64_t on all platforms. Generally this gets poked into a
4866 // register and eventually used as an address, so if the
4867 // addressing registers are wider than pointers and the platform
4868 // doesn't implicitly ignore high-order bits when doing
4869 // addressing, we need to make sure we zext / sext based on
4870 // the platform's expectations.
4871 //
4872 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4873
4874 // Cast the pointer to intptr_t.
4875 Value *Ptr = EmitScalarExpr(E->getArg(0));
4876 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4877
4878 // If that's 64 bits, we're done.
4879 if (IntPtrTy->getBitWidth() == 64)
4880 return RValue::get(Result);
4881
4882 // Otherwise, ask the codegen data what to do.
4883 if (getTargetHooks().extendPointerWithSExt())
4884 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4885 else
4886 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4887 }
4888 case Builtin::BI__builtin_setjmp: {
4889 // Buffer is a void**.
4890 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4891
4892 if (getTarget().getTriple().getArch() == llvm::Triple::systemz) {
4893 // On this target, the back end fills in the context buffer completely.
4894 // It doesn't really matter if the frontend stores to the buffer before
4895 // calling setjmp, the back-end is going to overwrite them anyway.
4896 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4897 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4898 }
4899
4900 // Store the frame pointer to the setjmp buffer.
4901 Value *FrameAddr = Builder.CreateCall(
4902 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4903 ConstantInt::get(Int32Ty, 0));
4904 Builder.CreateStore(FrameAddr, Buf);
4905
4906 // Store the stack pointer to the setjmp buffer.
4907 Value *StackAddr = Builder.CreateStackSave();
4908 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4909
4910 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4911 Builder.CreateStore(StackAddr, StackSaveSlot);
4912
4913 // Call LLVM's EH setjmp, which is lightweight.
4914 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4915 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4916 }
4917 case Builtin::BI__builtin_longjmp: {
4918 Value *Buf = EmitScalarExpr(E->getArg(0));
4919
4920 // Call LLVM's EH longjmp, which is lightweight.
4921 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4922
4923 // longjmp doesn't return; mark this as unreachable.
4924 Builder.CreateUnreachable();
4925
4926 // We do need to preserve an insertion point.
4927 EmitBlock(createBasicBlock("longjmp.cont"));
4928
4929 return RValue::get(nullptr);
4930 }
4931 case Builtin::BI__builtin_launder: {
4932 const Expr *Arg = E->getArg(0);
4933 QualType ArgTy = Arg->getType()->getPointeeType();
4934 Value *Ptr = EmitScalarExpr(Arg);
4935 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4937
4938 return RValue::get(Ptr);
4939 }
4940 case Builtin::BI__sync_fetch_and_add:
4941 case Builtin::BI__sync_fetch_and_sub:
4942 case Builtin::BI__sync_fetch_and_or:
4943 case Builtin::BI__sync_fetch_and_and:
4944 case Builtin::BI__sync_fetch_and_xor:
4945 case Builtin::BI__sync_fetch_and_nand:
4946 case Builtin::BI__sync_add_and_fetch:
4947 case Builtin::BI__sync_sub_and_fetch:
4948 case Builtin::BI__sync_and_and_fetch:
4949 case Builtin::BI__sync_or_and_fetch:
4950 case Builtin::BI__sync_xor_and_fetch:
4951 case Builtin::BI__sync_nand_and_fetch:
4952 case Builtin::BI__sync_val_compare_and_swap:
4953 case Builtin::BI__sync_bool_compare_and_swap:
4954 case Builtin::BI__sync_lock_test_and_set:
4955 case Builtin::BI__sync_lock_release:
4956 case Builtin::BI__sync_swap:
4957 llvm_unreachable("Shouldn't make it through sema");
4958 case Builtin::BI__sync_fetch_and_add_1:
4959 case Builtin::BI__sync_fetch_and_add_2:
4960 case Builtin::BI__sync_fetch_and_add_4:
4961 case Builtin::BI__sync_fetch_and_add_8:
4962 case Builtin::BI__sync_fetch_and_add_16:
4963 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4964 case Builtin::BI__sync_fetch_and_sub_1:
4965 case Builtin::BI__sync_fetch_and_sub_2:
4966 case Builtin::BI__sync_fetch_and_sub_4:
4967 case Builtin::BI__sync_fetch_and_sub_8:
4968 case Builtin::BI__sync_fetch_and_sub_16:
4969 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4970 case Builtin::BI__sync_fetch_and_or_1:
4971 case Builtin::BI__sync_fetch_and_or_2:
4972 case Builtin::BI__sync_fetch_and_or_4:
4973 case Builtin::BI__sync_fetch_and_or_8:
4974 case Builtin::BI__sync_fetch_and_or_16:
4975 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4976 case Builtin::BI__sync_fetch_and_and_1:
4977 case Builtin::BI__sync_fetch_and_and_2:
4978 case Builtin::BI__sync_fetch_and_and_4:
4979 case Builtin::BI__sync_fetch_and_and_8:
4980 case Builtin::BI__sync_fetch_and_and_16:
4981 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4982 case Builtin::BI__sync_fetch_and_xor_1:
4983 case Builtin::BI__sync_fetch_and_xor_2:
4984 case Builtin::BI__sync_fetch_and_xor_4:
4985 case Builtin::BI__sync_fetch_and_xor_8:
4986 case Builtin::BI__sync_fetch_and_xor_16:
4987 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4988 case Builtin::BI__sync_fetch_and_nand_1:
4989 case Builtin::BI__sync_fetch_and_nand_2:
4990 case Builtin::BI__sync_fetch_and_nand_4:
4991 case Builtin::BI__sync_fetch_and_nand_8:
4992 case Builtin::BI__sync_fetch_and_nand_16:
4993 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4994
4995 // Clang extensions: not overloaded yet.
4996 case Builtin::BI__sync_fetch_and_min:
4997 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4998 case Builtin::BI__sync_fetch_and_max:
4999 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
5000 case Builtin::BI__sync_fetch_and_umin:
5001 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
5002 case Builtin::BI__sync_fetch_and_umax:
5003 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
5004
5005 case Builtin::BI__sync_add_and_fetch_1:
5006 case Builtin::BI__sync_add_and_fetch_2:
5007 case Builtin::BI__sync_add_and_fetch_4:
5008 case Builtin::BI__sync_add_and_fetch_8:
5009 case Builtin::BI__sync_add_and_fetch_16:
5010 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
5011 llvm::Instruction::Add);
5012 case Builtin::BI__sync_sub_and_fetch_1:
5013 case Builtin::BI__sync_sub_and_fetch_2:
5014 case Builtin::BI__sync_sub_and_fetch_4:
5015 case Builtin::BI__sync_sub_and_fetch_8:
5016 case Builtin::BI__sync_sub_and_fetch_16:
5017 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
5018 llvm::Instruction::Sub);
5019 case Builtin::BI__sync_and_and_fetch_1:
5020 case Builtin::BI__sync_and_and_fetch_2:
5021 case Builtin::BI__sync_and_and_fetch_4:
5022 case Builtin::BI__sync_and_and_fetch_8:
5023 case Builtin::BI__sync_and_and_fetch_16:
5024 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
5025 llvm::Instruction::And);
5026 case Builtin::BI__sync_or_and_fetch_1:
5027 case Builtin::BI__sync_or_and_fetch_2:
5028 case Builtin::BI__sync_or_and_fetch_4:
5029 case Builtin::BI__sync_or_and_fetch_8:
5030 case Builtin::BI__sync_or_and_fetch_16:
5031 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
5032 llvm::Instruction::Or);
5033 case Builtin::BI__sync_xor_and_fetch_1:
5034 case Builtin::BI__sync_xor_and_fetch_2:
5035 case Builtin::BI__sync_xor_and_fetch_4:
5036 case Builtin::BI__sync_xor_and_fetch_8:
5037 case Builtin::BI__sync_xor_and_fetch_16:
5038 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
5039 llvm::Instruction::Xor);
5040 case Builtin::BI__sync_nand_and_fetch_1:
5041 case Builtin::BI__sync_nand_and_fetch_2:
5042 case Builtin::BI__sync_nand_and_fetch_4:
5043 case Builtin::BI__sync_nand_and_fetch_8:
5044 case Builtin::BI__sync_nand_and_fetch_16:
5045 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
5046 llvm::Instruction::And, true);
5047
5048 case Builtin::BI__sync_val_compare_and_swap_1:
5049 case Builtin::BI__sync_val_compare_and_swap_2:
5050 case Builtin::BI__sync_val_compare_and_swap_4:
5051 case Builtin::BI__sync_val_compare_and_swap_8:
5052 case Builtin::BI__sync_val_compare_and_swap_16:
5053 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
5054
5055 case Builtin::BI__sync_bool_compare_and_swap_1:
5056 case Builtin::BI__sync_bool_compare_and_swap_2:
5057 case Builtin::BI__sync_bool_compare_and_swap_4:
5058 case Builtin::BI__sync_bool_compare_and_swap_8:
5059 case Builtin::BI__sync_bool_compare_and_swap_16:
5060 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
5061
5062 case Builtin::BI__sync_swap_1:
5063 case Builtin::BI__sync_swap_2:
5064 case Builtin::BI__sync_swap_4:
5065 case Builtin::BI__sync_swap_8:
5066 case Builtin::BI__sync_swap_16:
5067 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5068
5069 case Builtin::BI__sync_lock_test_and_set_1:
5070 case Builtin::BI__sync_lock_test_and_set_2:
5071 case Builtin::BI__sync_lock_test_and_set_4:
5072 case Builtin::BI__sync_lock_test_and_set_8:
5073 case Builtin::BI__sync_lock_test_and_set_16:
5074 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5075
5076 case Builtin::BI__sync_lock_release_1:
5077 case Builtin::BI__sync_lock_release_2:
5078 case Builtin::BI__sync_lock_release_4:
5079 case Builtin::BI__sync_lock_release_8:
5080 case Builtin::BI__sync_lock_release_16: {
5081 Address Ptr = CheckAtomicAlignment(*this, E);
5082 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5083
5084 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5085 getContext().getTypeSize(ElTy));
5086 llvm::StoreInst *Store =
5087 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
5088 Store->setAtomic(llvm::AtomicOrdering::Release);
5089 return RValue::get(nullptr);
5090 }
5091
5092 case Builtin::BI__sync_synchronize: {
5093 // We assume this is supposed to correspond to a C++0x-style
5094 // sequentially-consistent fence (i.e. this is only usable for
5095 // synchronization, not device I/O or anything like that). This intrinsic
5096 // is really badly designed in the sense that in theory, there isn't
5097 // any way to safely use it... but in practice, it mostly works
5098 // to use it with non-atomic loads and stores to get acquire/release
5099 // semantics.
5100 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
5101 return RValue::get(nullptr);
5102 }
5103
5104 case Builtin::BI__builtin_nontemporal_load:
5105 return RValue::get(EmitNontemporalLoad(*this, E));
5106 case Builtin::BI__builtin_nontemporal_store:
5107 return RValue::get(EmitNontemporalStore(*this, E));
5108 case Builtin::BI__c11_atomic_is_lock_free:
5109 case Builtin::BI__atomic_is_lock_free: {
5110 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
5111 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
5112 // _Atomic(T) is always properly-aligned.
5113 const char *LibCallName = "__atomic_is_lock_free";
5114 CallArgList Args;
5115 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
5116 getContext().getSizeType());
5117 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
5118 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
5120 else
5121 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
5123 const CGFunctionInfo &FuncInfo =
5125 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
5126 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
5127 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
5128 ReturnValueSlot(), Args);
5129 }
5130
5131 case Builtin::BI__atomic_thread_fence:
5132 case Builtin::BI__atomic_signal_fence:
5133 case Builtin::BI__c11_atomic_thread_fence:
5134 case Builtin::BI__c11_atomic_signal_fence: {
5135 llvm::SyncScope::ID SSID;
5136 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
5137 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
5138 SSID = llvm::SyncScope::SingleThread;
5139 else
5140 SSID = llvm::SyncScope::System;
5141 Value *Order = EmitScalarExpr(E->getArg(0));
5142 if (isa<llvm::ConstantInt>(Order)) {
5143 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5144 switch (ord) {
5145 case 0: // memory_order_relaxed
5146 default: // invalid order
5147 break;
5148 case 1: // memory_order_consume
5149 case 2: // memory_order_acquire
5150 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5151 break;
5152 case 3: // memory_order_release
5153 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5154 break;
5155 case 4: // memory_order_acq_rel
5156 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5157 break;
5158 case 5: // memory_order_seq_cst
5159 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5160 break;
5161 }
5162 return RValue::get(nullptr);
5163 }
5164
5165 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
5166 AcquireBB = createBasicBlock("acquire", CurFn);
5167 ReleaseBB = createBasicBlock("release", CurFn);
5168 AcqRelBB = createBasicBlock("acqrel", CurFn);
5169 SeqCstBB = createBasicBlock("seqcst", CurFn);
5170 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5171
5172 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5173 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5174
5175 Builder.SetInsertPoint(AcquireBB);
5176 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5177 Builder.CreateBr(ContBB);
5178 SI->addCase(Builder.getInt32(1), AcquireBB);
5179 SI->addCase(Builder.getInt32(2), AcquireBB);
5180
5181 Builder.SetInsertPoint(ReleaseBB);
5182 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5183 Builder.CreateBr(ContBB);
5184 SI->addCase(Builder.getInt32(3), ReleaseBB);
5185
5186 Builder.SetInsertPoint(AcqRelBB);
5187 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5188 Builder.CreateBr(ContBB);
5189 SI->addCase(Builder.getInt32(4), AcqRelBB);
5190
5191 Builder.SetInsertPoint(SeqCstBB);
5192 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5193 Builder.CreateBr(ContBB);
5194 SI->addCase(Builder.getInt32(5), SeqCstBB);
5195
5196 Builder.SetInsertPoint(ContBB);
5197 return RValue::get(nullptr);
5198 }
5199 case Builtin::BI__scoped_atomic_thread_fence: {
5201
5202 Value *Order = EmitScalarExpr(E->getArg(0));
5203 Value *Scope = EmitScalarExpr(E->getArg(1));
5204 auto Ord = dyn_cast<llvm::ConstantInt>(Order);
5205 auto Scp = dyn_cast<llvm::ConstantInt>(Scope);
5206 if (Ord && Scp) {
5207 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5208 ? ScopeModel->map(Scp->getZExtValue())
5209 : ScopeModel->map(ScopeModel->getFallBackValue());
5210 switch (Ord->getZExtValue()) {
5211 case 0: // memory_order_relaxed
5212 default: // invalid order
5213 break;
5214 case 1: // memory_order_consume
5215 case 2: // memory_order_acquire
5216 Builder.CreateFence(
5217 llvm::AtomicOrdering::Acquire,
5218 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5219 llvm::AtomicOrdering::Acquire,
5220 getLLVMContext()));
5221 break;
5222 case 3: // memory_order_release
5223 Builder.CreateFence(
5224 llvm::AtomicOrdering::Release,
5225 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5226 llvm::AtomicOrdering::Release,
5227 getLLVMContext()));
5228 break;
5229 case 4: // memory_order_acq_rel
5230 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
5231 getTargetHooks().getLLVMSyncScopeID(
5232 getLangOpts(), SS,
5233 llvm::AtomicOrdering::AcquireRelease,
5234 getLLVMContext()));
5235 break;
5236 case 5: // memory_order_seq_cst
5237 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5238 getTargetHooks().getLLVMSyncScopeID(
5239 getLangOpts(), SS,
5240 llvm::AtomicOrdering::SequentiallyConsistent,
5241 getLLVMContext()));
5242 break;
5243 }
5244 return RValue::get(nullptr);
5245 }
5246
5247 llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
5248
5250 OrderBBs;
5251 if (Ord) {
5252 switch (Ord->getZExtValue()) {
5253 case 0: // memory_order_relaxed
5254 default: // invalid order
5255 ContBB->eraseFromParent();
5256 return RValue::get(nullptr);
5257 case 1: // memory_order_consume
5258 case 2: // memory_order_acquire
5259 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5260 llvm::AtomicOrdering::Acquire);
5261 break;
5262 case 3: // memory_order_release
5263 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5264 llvm::AtomicOrdering::Release);
5265 break;
5266 case 4: // memory_order_acq_rel
5267 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5268 llvm::AtomicOrdering::AcquireRelease);
5269 break;
5270 case 5: // memory_order_seq_cst
5271 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5272 llvm::AtomicOrdering::SequentiallyConsistent);
5273 break;
5274 }
5275 } else {
5276 llvm::BasicBlock *AcquireBB = createBasicBlock("acquire", CurFn);
5277 llvm::BasicBlock *ReleaseBB = createBasicBlock("release", CurFn);
5278 llvm::BasicBlock *AcqRelBB = createBasicBlock("acqrel", CurFn);
5279 llvm::BasicBlock *SeqCstBB = createBasicBlock("seqcst", CurFn);
5280
5281 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5282 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5283 SI->addCase(Builder.getInt32(1), AcquireBB);
5284 SI->addCase(Builder.getInt32(2), AcquireBB);
5285 SI->addCase(Builder.getInt32(3), ReleaseBB);
5286 SI->addCase(Builder.getInt32(4), AcqRelBB);
5287 SI->addCase(Builder.getInt32(5), SeqCstBB);
5288
5289 OrderBBs.emplace_back(AcquireBB, llvm::AtomicOrdering::Acquire);
5290 OrderBBs.emplace_back(ReleaseBB, llvm::AtomicOrdering::Release);
5291 OrderBBs.emplace_back(AcqRelBB, llvm::AtomicOrdering::AcquireRelease);
5292 OrderBBs.emplace_back(SeqCstBB,
5293 llvm::AtomicOrdering::SequentiallyConsistent);
5294 }
5295
5296 for (auto &[OrderBB, Ordering] : OrderBBs) {
5297 Builder.SetInsertPoint(OrderBB);
5298 if (Scp) {
5299 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5300 ? ScopeModel->map(Scp->getZExtValue())
5301 : ScopeModel->map(ScopeModel->getFallBackValue());
5302 Builder.CreateFence(Ordering,
5303 getTargetHooks().getLLVMSyncScopeID(
5304 getLangOpts(), SS, Ordering, getLLVMContext()));
5305 Builder.CreateBr(ContBB);
5306 } else {
5307 llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
5308 for (unsigned Scp : ScopeModel->getRuntimeValues())
5309 BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
5310
5311 auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
5312 llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
5313 for (unsigned Scp : ScopeModel->getRuntimeValues()) {
5314 auto *B = BBs[Scp];
5315 SI->addCase(Builder.getInt32(Scp), B);
5316
5317 Builder.SetInsertPoint(B);
5318 Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
5319 getLangOpts(), ScopeModel->map(Scp),
5320 Ordering, getLLVMContext()));
5321 Builder.CreateBr(ContBB);
5322 }
5323 }
5324 }
5325
5326 Builder.SetInsertPoint(ContBB);
5327 return RValue::get(nullptr);
5328 }
5329
5330 case Builtin::BI__builtin_signbit:
5331 case Builtin::BI__builtin_signbitf:
5332 case Builtin::BI__builtin_signbitl: {
5333 return RValue::get(
5334 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
5335 ConvertType(E->getType())));
5336 }
5337 case Builtin::BI__warn_memset_zero_len:
5338 return RValue::getIgnored();
5339 case Builtin::BI__annotation: {
5340 // Re-encode each wide string to UTF8 and make an MDString.
5342 for (const Expr *Arg : E->arguments()) {
5343 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
5344 assert(Str->getCharByteWidth() == 2);
5345 StringRef WideBytes = Str->getBytes();
5346 std::string StrUtf8;
5347 if (!convertUTF16ToUTF8String(
5348 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5349 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5350 continue;
5351 }
5352 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5353 }
5354
5355 // Build and MDTuple of MDStrings and emit the intrinsic call.
5356 llvm::Function *F =
5357 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5358 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5359 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5360 return RValue::getIgnored();
5361 }
5362 case Builtin::BI__builtin_annotation: {
5363 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5364 llvm::Function *F =
5365 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5366 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5367
5368 // Get the annotation string, go through casts. Sema requires this to be a
5369 // non-wide string literal, potentially casted, so the cast<> is safe.
5370 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5371 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5372 return RValue::get(
5373 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5374 }
5375 case Builtin::BI__builtin_addcb:
5376 case Builtin::BI__builtin_addcs:
5377 case Builtin::BI__builtin_addc:
5378 case Builtin::BI__builtin_addcl:
5379 case Builtin::BI__builtin_addcll:
5380 case Builtin::BI__builtin_subcb:
5381 case Builtin::BI__builtin_subcs:
5382 case Builtin::BI__builtin_subc:
5383 case Builtin::BI__builtin_subcl:
5384 case Builtin::BI__builtin_subcll: {
5385
5386 // We translate all of these builtins from expressions of the form:
5387 // int x = ..., y = ..., carryin = ..., carryout, result;
5388 // result = __builtin_addc(x, y, carryin, &carryout);
5389 //
5390 // to LLVM IR of the form:
5391 //
5392 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5393 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5394 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5395 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5396 // i32 %carryin)
5397 // %result = extractvalue {i32, i1} %tmp2, 0
5398 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5399 // %tmp3 = or i1 %carry1, %carry2
5400 // %tmp4 = zext i1 %tmp3 to i32
5401 // store i32 %tmp4, i32* %carryout
5402
5403 // Scalarize our inputs.
5404 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5405 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5406 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5407 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5408
5409 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5410 llvm::Intrinsic::ID IntrinsicId;
5411 switch (BuiltinID) {
5412 default: llvm_unreachable("Unknown multiprecision builtin id.");
5413 case Builtin::BI__builtin_addcb:
5414 case Builtin::BI__builtin_addcs:
5415 case Builtin::BI__builtin_addc:
5416 case Builtin::BI__builtin_addcl:
5417 case Builtin::BI__builtin_addcll:
5418 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5419 break;
5420 case Builtin::BI__builtin_subcb:
5421 case Builtin::BI__builtin_subcs:
5422 case Builtin::BI__builtin_subc:
5423 case Builtin::BI__builtin_subcl:
5424 case Builtin::BI__builtin_subcll:
5425 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5426 break;
5427 }
5428
5429 // Construct our resulting LLVM IR expression.
5430 llvm::Value *Carry1;
5431 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5432 X, Y, Carry1);
5433 llvm::Value *Carry2;
5434 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5435 Sum1, Carryin, Carry2);
5436 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5437 X->getType());
5438 Builder.CreateStore(CarryOut, CarryOutPtr);
5439 return RValue::get(Sum2);
5440 }
5441
5442 case Builtin::BI__builtin_add_overflow:
5443 case Builtin::BI__builtin_sub_overflow:
5444 case Builtin::BI__builtin_mul_overflow: {
5445 const clang::Expr *LeftArg = E->getArg(0);
5446 const clang::Expr *RightArg = E->getArg(1);
5447 const clang::Expr *ResultArg = E->getArg(2);
5448
5449 clang::QualType ResultQTy =
5450 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5451
5452 WidthAndSignedness LeftInfo =
5454 WidthAndSignedness RightInfo =
5456 WidthAndSignedness ResultInfo =
5458
5459 // Handle mixed-sign multiplication as a special case, because adding
5460 // runtime or backend support for our generic irgen would be too expensive.
5461 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5462 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5463 RightInfo, ResultArg, ResultQTy,
5464 ResultInfo);
5465
5466 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5467 ResultInfo))
5469 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5470 ResultInfo);
5471
5472 WidthAndSignedness EncompassingInfo =
5473 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5474
5475 llvm::Type *EncompassingLLVMTy =
5476 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5477
5478 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5479
5480 llvm::Intrinsic::ID IntrinsicId;
5481 switch (BuiltinID) {
5482 default:
5483 llvm_unreachable("Unknown overflow builtin id.");
5484 case Builtin::BI__builtin_add_overflow:
5485 IntrinsicId = EncompassingInfo.Signed
5486 ? llvm::Intrinsic::sadd_with_overflow
5487 : llvm::Intrinsic::uadd_with_overflow;
5488 break;
5489 case Builtin::BI__builtin_sub_overflow:
5490 IntrinsicId = EncompassingInfo.Signed
5491 ? llvm::Intrinsic::ssub_with_overflow
5492 : llvm::Intrinsic::usub_with_overflow;
5493 break;
5494 case Builtin::BI__builtin_mul_overflow:
5495 IntrinsicId = EncompassingInfo.Signed
5496 ? llvm::Intrinsic::smul_with_overflow
5497 : llvm::Intrinsic::umul_with_overflow;
5498 break;
5499 }
5500
5501 llvm::Value *Left = EmitScalarExpr(LeftArg);
5502 llvm::Value *Right = EmitScalarExpr(RightArg);
5503 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5504
5505 // Extend each operand to the encompassing type.
5506 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5507 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5508
5509 // Perform the operation on the extended values.
5510 llvm::Value *Overflow, *Result;
5511 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5512
5513 if (EncompassingInfo.Width > ResultInfo.Width) {
5514 // The encompassing type is wider than the result type, so we need to
5515 // truncate it.
5516 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5517
5518 // To see if the truncation caused an overflow, we will extend
5519 // the result and then compare it to the original result.
5520 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5521 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5522 llvm::Value *TruncationOverflow =
5523 Builder.CreateICmpNE(Result, ResultTruncExt);
5524
5525 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5526 Result = ResultTrunc;
5527 }
5528
5529 // Finally, store the result using the pointer.
5530 bool isVolatile =
5531 ResultArg->getType()->getPointeeType().isVolatileQualified();
5532 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5533
5534 return RValue::get(Overflow);
5535 }
5536
5537 case Builtin::BI__builtin_uadd_overflow:
5538 case Builtin::BI__builtin_uaddl_overflow:
5539 case Builtin::BI__builtin_uaddll_overflow:
5540 case Builtin::BI__builtin_usub_overflow:
5541 case Builtin::BI__builtin_usubl_overflow:
5542 case Builtin::BI__builtin_usubll_overflow:
5543 case Builtin::BI__builtin_umul_overflow:
5544 case Builtin::BI__builtin_umull_overflow:
5545 case Builtin::BI__builtin_umulll_overflow:
5546 case Builtin::BI__builtin_sadd_overflow:
5547 case Builtin::BI__builtin_saddl_overflow:
5548 case Builtin::BI__builtin_saddll_overflow:
5549 case Builtin::BI__builtin_ssub_overflow:
5550 case Builtin::BI__builtin_ssubl_overflow:
5551 case Builtin::BI__builtin_ssubll_overflow:
5552 case Builtin::BI__builtin_smul_overflow:
5553 case Builtin::BI__builtin_smull_overflow:
5554 case Builtin::BI__builtin_smulll_overflow: {
5555
5556 // We translate all of these builtins directly to the relevant llvm IR node.
5557
5558 // Scalarize our inputs.
5559 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5560 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5561 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5562
5563 // Decide which of the overflow intrinsics we are lowering to:
5564 llvm::Intrinsic::ID IntrinsicId;
5565 switch (BuiltinID) {
5566 default: llvm_unreachable("Unknown overflow builtin id.");
5567 case Builtin::BI__builtin_uadd_overflow:
5568 case Builtin::BI__builtin_uaddl_overflow:
5569 case Builtin::BI__builtin_uaddll_overflow:
5570 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5571 break;
5572 case Builtin::BI__builtin_usub_overflow:
5573 case Builtin::BI__builtin_usubl_overflow:
5574 case Builtin::BI__builtin_usubll_overflow:
5575 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5576 break;
5577 case Builtin::BI__builtin_umul_overflow:
5578 case Builtin::BI__builtin_umull_overflow:
5579 case Builtin::BI__builtin_umulll_overflow:
5580 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5581 break;
5582 case Builtin::BI__builtin_sadd_overflow:
5583 case Builtin::BI__builtin_saddl_overflow:
5584 case Builtin::BI__builtin_saddll_overflow:
5585 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5586 break;
5587 case Builtin::BI__builtin_ssub_overflow:
5588 case Builtin::BI__builtin_ssubl_overflow:
5589 case Builtin::BI__builtin_ssubll_overflow:
5590 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5591 break;
5592 case Builtin::BI__builtin_smul_overflow:
5593 case Builtin::BI__builtin_smull_overflow:
5594 case Builtin::BI__builtin_smulll_overflow:
5595 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5596 break;
5597 }
5598
5599
5600 llvm::Value *Carry;
5601 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5602 Builder.CreateStore(Sum, SumOutPtr);
5603
5604 return RValue::get(Carry);
5605 }
5606 case Builtin::BIaddressof:
5607 case Builtin::BI__addressof:
5608 case Builtin::BI__builtin_addressof:
5609 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5610 case Builtin::BI__builtin_function_start:
5613 case Builtin::BI__builtin_operator_new:
5615 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5616 case Builtin::BI__builtin_operator_delete:
5618 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5619 return RValue::get(nullptr);
5620
5621 case Builtin::BI__builtin_is_aligned:
5622 return EmitBuiltinIsAligned(E);
5623 case Builtin::BI__builtin_align_up:
5624 return EmitBuiltinAlignTo(E, true);
5625 case Builtin::BI__builtin_align_down:
5626 return EmitBuiltinAlignTo(E, false);
5627
5628 case Builtin::BI__noop:
5629 // __noop always evaluates to an integer literal zero.
5630 return RValue::get(ConstantInt::get(IntTy, 0));
5631 case Builtin::BI__builtin_call_with_static_chain: {
5632 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5633 const Expr *Chain = E->getArg(1);
5634 return EmitCall(Call->getCallee()->getType(),
5635 EmitCallee(Call->getCallee()), Call, ReturnValue,
5636 EmitScalarExpr(Chain));
5637 }
5638 case Builtin::BI_InterlockedExchange8:
5639 case Builtin::BI_InterlockedExchange16:
5640 case Builtin::BI_InterlockedExchange:
5641 case Builtin::BI_InterlockedExchangePointer:
5642 return RValue::get(
5643 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5644 case Builtin::BI_InterlockedCompareExchangePointer:
5645 return RValue::get(
5646 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange, E));
5647 case Builtin::BI_InterlockedCompareExchangePointer_nf:
5648 return RValue::get(
5649 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E));
5650 case Builtin::BI_InterlockedCompareExchange8:
5651 case Builtin::BI_InterlockedCompareExchange16:
5652 case Builtin::BI_InterlockedCompareExchange:
5653 case Builtin::BI_InterlockedCompareExchange64:
5655 case Builtin::BI_InterlockedIncrement16:
5656 case Builtin::BI_InterlockedIncrement:
5657 return RValue::get(
5658 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5659 case Builtin::BI_InterlockedDecrement16:
5660 case Builtin::BI_InterlockedDecrement:
5661 return RValue::get(
5662 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5663 case Builtin::BI_InterlockedAnd8:
5664 case Builtin::BI_InterlockedAnd16:
5665 case Builtin::BI_InterlockedAnd:
5666 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5667 case Builtin::BI_InterlockedExchangeAdd8:
5668 case Builtin::BI_InterlockedExchangeAdd16:
5669 case Builtin::BI_InterlockedExchangeAdd:
5670 return RValue::get(
5671 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5672 case Builtin::BI_InterlockedExchangeSub8:
5673 case Builtin::BI_InterlockedExchangeSub16:
5674 case Builtin::BI_InterlockedExchangeSub:
5675 return RValue::get(
5676 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5677 case Builtin::BI_InterlockedOr8:
5678 case Builtin::BI_InterlockedOr16:
5679 case Builtin::BI_InterlockedOr:
5680 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5681 case Builtin::BI_InterlockedXor8:
5682 case Builtin::BI_InterlockedXor16:
5683 case Builtin::BI_InterlockedXor:
5684 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5685
5686 case Builtin::BI_bittest64:
5687 case Builtin::BI_bittest:
5688 case Builtin::BI_bittestandcomplement64:
5689 case Builtin::BI_bittestandcomplement:
5690 case Builtin::BI_bittestandreset64:
5691 case Builtin::BI_bittestandreset:
5692 case Builtin::BI_bittestandset64:
5693 case Builtin::BI_bittestandset:
5694 case Builtin::BI_interlockedbittestandreset:
5695 case Builtin::BI_interlockedbittestandreset64:
5696 case Builtin::BI_interlockedbittestandset64:
5697 case Builtin::BI_interlockedbittestandset:
5698 case Builtin::BI_interlockedbittestandset_acq:
5699 case Builtin::BI_interlockedbittestandset_rel:
5700 case Builtin::BI_interlockedbittestandset_nf:
5701 case Builtin::BI_interlockedbittestandreset_acq:
5702 case Builtin::BI_interlockedbittestandreset_rel:
5703 case Builtin::BI_interlockedbittestandreset_nf:
5704 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5705
5706 // These builtins exist to emit regular volatile loads and stores not
5707 // affected by the -fms-volatile setting.
5708 case Builtin::BI__iso_volatile_load8:
5709 case Builtin::BI__iso_volatile_load16:
5710 case Builtin::BI__iso_volatile_load32:
5711 case Builtin::BI__iso_volatile_load64:
5712 return RValue::get(EmitISOVolatileLoad(*this, E));
5713 case Builtin::BI__iso_volatile_store8:
5714 case Builtin::BI__iso_volatile_store16:
5715 case Builtin::BI__iso_volatile_store32:
5716 case Builtin::BI__iso_volatile_store64:
5717 return RValue::get(EmitISOVolatileStore(*this, E));
5718
5719 case Builtin::BI__builtin_ptrauth_sign_constant:
5720 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5721
5722 case Builtin::BI__builtin_ptrauth_auth:
5723 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5724 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5725 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5726 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5727 case Builtin::BI__builtin_ptrauth_strip: {
5728 // Emit the arguments.
5730 for (auto argExpr : E->arguments())
5731 Args.push_back(EmitScalarExpr(argExpr));
5732
5733 // Cast the value to intptr_t, saving its original type.
5734 llvm::Type *OrigValueType = Args[0]->getType();
5735 if (OrigValueType->isPointerTy())
5736 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5737
5738 switch (BuiltinID) {
5739 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5740 if (Args[4]->getType()->isPointerTy())
5741 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5742 [[fallthrough]];
5743
5744 case Builtin::BI__builtin_ptrauth_auth:
5745 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5746 if (Args[2]->getType()->isPointerTy())
5747 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5748 break;
5749
5750 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5751 if (Args[1]->getType()->isPointerTy())
5752 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5753 break;
5754
5755 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5756 case Builtin::BI__builtin_ptrauth_strip:
5757 break;
5758 }
5759
5760 // Call the intrinsic.
5761 auto IntrinsicID = [&]() -> unsigned {
5762 switch (BuiltinID) {
5763 case Builtin::BI__builtin_ptrauth_auth:
5764 return llvm::Intrinsic::ptrauth_auth;
5765 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5766 return llvm::Intrinsic::ptrauth_resign;
5767 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5768 return llvm::Intrinsic::ptrauth_blend;
5769 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5770 return llvm::Intrinsic::ptrauth_sign_generic;
5771 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5772 return llvm::Intrinsic::ptrauth_sign;
5773 case Builtin::BI__builtin_ptrauth_strip:
5774 return llvm::Intrinsic::ptrauth_strip;
5775 }
5776 llvm_unreachable("bad ptrauth intrinsic");
5777 }();
5778 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5779 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5780
5781 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5782 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5783 OrigValueType->isPointerTy()) {
5784 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5785 }
5786 return RValue::get(Result);
5787 }
5788
5789 case Builtin::BI__exception_code:
5790 case Builtin::BI_exception_code:
5792 case Builtin::BI__exception_info:
5793 case Builtin::BI_exception_info:
5795 case Builtin::BI__abnormal_termination:
5796 case Builtin::BI_abnormal_termination:
5798 case Builtin::BI_setjmpex:
5799 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5800 E->getArg(0)->getType()->isPointerType())
5801 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5802 break;
5803 case Builtin::BI_setjmp:
5804 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5805 E->getArg(0)->getType()->isPointerType()) {
5806 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5807 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5808 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5809 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5810 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5811 }
5812 break;
5813
5814 // C++ std:: builtins.
5815 case Builtin::BImove:
5816 case Builtin::BImove_if_noexcept:
5817 case Builtin::BIforward:
5818 case Builtin::BIforward_like:
5819 case Builtin::BIas_const:
5820 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5821 case Builtin::BI__GetExceptionInfo: {
5822 if (llvm::GlobalVariable *GV =
5824 return RValue::get(GV);
5825 break;
5826 }
5827
5828 case Builtin::BI__fastfail:
5829 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5830
5831 case Builtin::BI__builtin_coro_id:
5832 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5833 case Builtin::BI__builtin_coro_promise:
5834 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5835 case Builtin::BI__builtin_coro_resume:
5836 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5837 return RValue::get(nullptr);
5838 case Builtin::BI__builtin_coro_frame:
5839 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5840 case Builtin::BI__builtin_coro_noop:
5841 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5842 case Builtin::BI__builtin_coro_free:
5843 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5844 case Builtin::BI__builtin_coro_destroy:
5845 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5846 return RValue::get(nullptr);
5847 case Builtin::BI__builtin_coro_done:
5848 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5849 case Builtin::BI__builtin_coro_alloc:
5850 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5851 case Builtin::BI__builtin_coro_begin:
5852 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5853 case Builtin::BI__builtin_coro_end:
5854 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5855 case Builtin::BI__builtin_coro_suspend:
5856 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5857 case Builtin::BI__builtin_coro_size:
5858 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5859 case Builtin::BI__builtin_coro_align:
5860 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5861
5862 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5863 case Builtin::BIread_pipe:
5864 case Builtin::BIwrite_pipe: {
5865 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5866 *Arg1 = EmitScalarExpr(E->getArg(1));
5867 CGOpenCLRuntime OpenCLRT(CGM);
5868 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5869 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5870
5871 // Type of the generic packet parameter.
5872 unsigned GenericAS =
5874 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5875
5876 // Testing which overloaded version we should generate the call for.
5877 if (2U == E->getNumArgs()) {
5878 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5879 : "__write_pipe_2";
5880 // Creating a generic function type to be able to call with any builtin or
5881 // user defined type.
5882 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5883 llvm::FunctionType *FTy = llvm::FunctionType::get(
5884 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5885 Value *ACast = Builder.CreateAddrSpaceCast(Arg1, I8PTy);
5886 return RValue::get(
5888 {Arg0, ACast, PacketSize, PacketAlign}));
5889 } else {
5890 assert(4 == E->getNumArgs() &&
5891 "Illegal number of parameters to pipe function");
5892 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5893 : "__write_pipe_4";
5894
5895 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5896 Int32Ty, Int32Ty};
5897 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5898 *Arg3 = EmitScalarExpr(E->getArg(3));
5899 llvm::FunctionType *FTy = llvm::FunctionType::get(
5900 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5901 Value *ACast = Builder.CreateAddrSpaceCast(Arg3, I8PTy);
5902 // We know the third argument is an integer type, but we may need to cast
5903 // it to i32.
5904 if (Arg2->getType() != Int32Ty)
5905 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5906 return RValue::get(
5908 {Arg0, Arg1, Arg2, ACast, PacketSize, PacketAlign}));
5909 }
5910 }
5911 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5912 // functions
5913 case Builtin::BIreserve_read_pipe:
5914 case Builtin::BIreserve_write_pipe:
5915 case Builtin::BIwork_group_reserve_read_pipe:
5916 case Builtin::BIwork_group_reserve_write_pipe:
5917 case Builtin::BIsub_group_reserve_read_pipe:
5918 case Builtin::BIsub_group_reserve_write_pipe: {
5919 // Composing the mangled name for the function.
5920 const char *Name;
5921 if (BuiltinID == Builtin::BIreserve_read_pipe)
5922 Name = "__reserve_read_pipe";
5923 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5924 Name = "__reserve_write_pipe";
5925 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5926 Name = "__work_group_reserve_read_pipe";
5927 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5928 Name = "__work_group_reserve_write_pipe";
5929 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5930 Name = "__sub_group_reserve_read_pipe";
5931 else
5932 Name = "__sub_group_reserve_write_pipe";
5933
5934 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5935 *Arg1 = EmitScalarExpr(E->getArg(1));
5936 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5937 CGOpenCLRuntime OpenCLRT(CGM);
5938 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5939 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5940
5941 // Building the generic function prototype.
5942 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5943 llvm::FunctionType *FTy = llvm::FunctionType::get(
5944 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5945 // We know the second argument is an integer type, but we may need to cast
5946 // it to i32.
5947 if (Arg1->getType() != Int32Ty)
5948 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5950 {Arg0, Arg1, PacketSize, PacketAlign}));
5951 }
5952 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5953 // functions
5954 case Builtin::BIcommit_read_pipe:
5955 case Builtin::BIcommit_write_pipe:
5956 case Builtin::BIwork_group_commit_read_pipe:
5957 case Builtin::BIwork_group_commit_write_pipe:
5958 case Builtin::BIsub_group_commit_read_pipe:
5959 case Builtin::BIsub_group_commit_write_pipe: {
5960 const char *Name;
5961 if (BuiltinID == Builtin::BIcommit_read_pipe)
5962 Name = "__commit_read_pipe";
5963 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5964 Name = "__commit_write_pipe";
5965 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5966 Name = "__work_group_commit_read_pipe";
5967 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5968 Name = "__work_group_commit_write_pipe";
5969 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5970 Name = "__sub_group_commit_read_pipe";
5971 else
5972 Name = "__sub_group_commit_write_pipe";
5973
5974 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5975 *Arg1 = EmitScalarExpr(E->getArg(1));
5976 CGOpenCLRuntime OpenCLRT(CGM);
5977 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5978 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5979
5980 // Building the generic function prototype.
5981 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5982 llvm::FunctionType *FTy =
5983 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5984 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5985
5987 {Arg0, Arg1, PacketSize, PacketAlign}));
5988 }
5989 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5990 case Builtin::BIget_pipe_num_packets:
5991 case Builtin::BIget_pipe_max_packets: {
5992 const char *BaseName;
5993 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5994 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5995 BaseName = "__get_pipe_num_packets";
5996 else
5997 BaseName = "__get_pipe_max_packets";
5998 std::string Name = std::string(BaseName) +
5999 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
6000
6001 // Building the generic function prototype.
6002 Value *Arg0 = EmitScalarExpr(E->getArg(0));
6003 CGOpenCLRuntime OpenCLRT(CGM);
6004 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6005 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6006 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
6007 llvm::FunctionType *FTy = llvm::FunctionType::get(
6008 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6009
6011 {Arg0, PacketSize, PacketAlign}));
6012 }
6013
6014 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
6015 case Builtin::BIto_global:
6016 case Builtin::BIto_local:
6017 case Builtin::BIto_private: {
6018 auto Arg0 = EmitScalarExpr(E->getArg(0));
6019 auto NewArgT = llvm::PointerType::get(
6022 auto NewRetT = llvm::PointerType::get(
6026 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
6027 llvm::Value *NewArg;
6028 if (Arg0->getType()->getPointerAddressSpace() !=
6029 NewArgT->getPointerAddressSpace())
6030 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
6031 else
6032 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
6033 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
6034 auto NewCall =
6035 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
6036 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
6037 ConvertType(E->getType())));
6038 }
6039
6040 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
6041 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
6042 // The code below expands the builtin call to a call to one of the following
6043 // functions that an OpenCL runtime library will have to provide:
6044 // __enqueue_kernel_basic
6045 // __enqueue_kernel_varargs
6046 // __enqueue_kernel_basic_events
6047 // __enqueue_kernel_events_varargs
6048 case Builtin::BIenqueue_kernel: {
6049 StringRef Name; // Generated function call name
6050 unsigned NumArgs = E->getNumArgs();
6051
6052 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
6053 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6054 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6055
6056 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
6057 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
6058 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
6059 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
6060 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
6061
6062 if (NumArgs == 4) {
6063 // The most basic form of the call with parameters:
6064 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
6065 Name = "__enqueue_kernel_basic";
6066 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
6067 GenericVoidPtrTy};
6068 llvm::FunctionType *FTy = llvm::FunctionType::get(
6069 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6070
6071 auto Info =
6072 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6073 llvm::Value *Kernel =
6074 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6075 llvm::Value *Block =
6076 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6077
6078 auto RTCall = EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6079 {Queue, Flags, Range, Kernel, Block});
6080 return RValue::get(RTCall);
6081 }
6082 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
6083
6084 // Create a temporary array to hold the sizes of local pointer arguments
6085 // for the block. \p First is the position of the first size argument.
6086 auto CreateArrayForSizeVar = [=](unsigned First)
6087 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
6088 llvm::APInt ArraySize(32, NumArgs - First);
6090 getContext().getSizeType(), ArraySize, nullptr,
6092 /*IndexTypeQuals=*/0);
6093 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
6094 llvm::Value *TmpPtr = Tmp.getPointer();
6095 // The EmitLifetime* pair expect a naked Alloca as their last argument,
6096 // however for cases where the default AS is not the Alloca AS, Tmp is
6097 // actually the Alloca ascasted to the default AS, hence the
6098 // stripPointerCasts()
6099 llvm::Value *Alloca = TmpPtr->stripPointerCasts();
6100 llvm::Value *TmpSize = EmitLifetimeStart(
6101 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
6102 llvm::Value *ElemPtr;
6103 // Each of the following arguments specifies the size of the corresponding
6104 // argument passed to the enqueued block.
6105 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
6106 for (unsigned I = First; I < NumArgs; ++I) {
6107 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
6108 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
6109 {Zero, Index});
6110 if (I == First)
6111 ElemPtr = GEP;
6112 auto *V =
6113 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
6115 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
6116 }
6117 // Return the Alloca itself rather than a potential ascast as this is only
6118 // used by the paired EmitLifetimeEnd.
6119 return std::tie(ElemPtr, TmpSize, Alloca);
6120 };
6121
6122 // Could have events and/or varargs.
6123 if (E->getArg(3)->getType()->isBlockPointerType()) {
6124 // No events passed, but has variadic arguments.
6125 Name = "__enqueue_kernel_varargs";
6126 auto Info =
6127 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6128 llvm::Value *Kernel =
6129 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6130 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6131 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6132 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
6133
6134 // Create a vector of the arguments, as well as a constant value to
6135 // express to the runtime the number of variadic arguments.
6136 llvm::Value *const Args[] = {Queue, Flags,
6137 Range, Kernel,
6138 Block, ConstantInt::get(IntTy, NumArgs - 4),
6139 ElemPtr};
6140 llvm::Type *const ArgTys[] = {
6141 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
6142 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
6143
6144 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
6145 auto Call = RValue::get(
6146 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
6147 if (TmpSize)
6148 EmitLifetimeEnd(TmpSize, TmpPtr);
6149 return Call;
6150 }
6151 // Any calls now have event arguments passed.
6152 if (NumArgs >= 7) {
6153 llvm::PointerType *PtrTy = llvm::PointerType::get(
6156
6157 llvm::Value *NumEvents =
6158 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
6159
6160 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
6161 // to be a null pointer constant (including `0` literal), we can take it
6162 // into account and emit null pointer directly.
6163 llvm::Value *EventWaitList = nullptr;
6164 if (E->getArg(4)->isNullPointerConstant(
6166 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
6167 } else {
6168 EventWaitList =
6169 E->getArg(4)->getType()->isArrayType()
6170 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
6171 : EmitScalarExpr(E->getArg(4));
6172 // Convert to generic address space.
6173 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
6174 }
6175 llvm::Value *EventRet = nullptr;
6176 if (E->getArg(5)->isNullPointerConstant(
6178 EventRet = llvm::ConstantPointerNull::get(PtrTy);
6179 } else {
6180 EventRet =
6181 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
6182 }
6183
6184 auto Info =
6185 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
6186 llvm::Value *Kernel =
6187 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6188 llvm::Value *Block =
6189 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6190
6191 std::vector<llvm::Type *> ArgTys = {
6192 QueueTy, Int32Ty, RangeTy, Int32Ty,
6193 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
6194
6195 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
6196 NumEvents, EventWaitList, EventRet,
6197 Kernel, Block};
6198
6199 if (NumArgs == 7) {
6200 // Has events but no variadics.
6201 Name = "__enqueue_kernel_basic_events";
6202 llvm::FunctionType *FTy = llvm::FunctionType::get(
6203 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6204 return RValue::get(
6207 }
6208 // Has event info and variadics
6209 // Pass the number of variadics to the runtime function too.
6210 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
6211 ArgTys.push_back(Int32Ty);
6212 Name = "__enqueue_kernel_events_varargs";
6213
6214 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6215 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
6216 Args.push_back(ElemPtr);
6217 ArgTys.push_back(ElemPtr->getType());
6218
6219 llvm::FunctionType *FTy = llvm::FunctionType::get(
6220 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6221 auto Call =
6224 if (TmpSize)
6225 EmitLifetimeEnd(TmpSize, TmpPtr);
6226 return Call;
6227 }
6228 llvm_unreachable("Unexpected enqueue_kernel signature");
6229 }
6230 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
6231 // parameter.
6232 case Builtin::BIget_kernel_work_group_size: {
6233 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6234 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6235 auto Info =
6236 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6237 Value *Kernel =
6238 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6239 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6242 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6243 false),
6244 "__get_kernel_work_group_size_impl"),
6245 {Kernel, Arg}));
6246 }
6247 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
6248 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6249 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6250 auto Info =
6251 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6252 Value *Kernel =
6253 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6254 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6257 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6258 false),
6259 "__get_kernel_preferred_work_group_size_multiple_impl"),
6260 {Kernel, Arg}));
6261 }
6262 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
6263 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
6264 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6265 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6266 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
6267 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
6268 auto Info =
6269 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
6270 Value *Kernel =
6271 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6272 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6273 const char *Name =
6274 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
6275 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
6276 : "__get_kernel_sub_group_count_for_ndrange_impl";
6279 llvm::FunctionType::get(
6280 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
6281 false),
6282 Name),
6283 {NDRange, Kernel, Block}));
6284 }
6285 case Builtin::BI__builtin_store_half:
6286 case Builtin::BI__builtin_store_halff: {
6287 Value *Val = EmitScalarExpr(E->getArg(0));
6289 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
6290 Builder.CreateStore(HalfVal, Address);
6291 return RValue::get(nullptr);
6292 }
6293 case Builtin::BI__builtin_load_half: {
6295 Value *HalfVal = Builder.CreateLoad(Address);
6296 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
6297 }
6298 case Builtin::BI__builtin_load_halff: {
6300 Value *HalfVal = Builder.CreateLoad(Address);
6301 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
6302 }
6303 case Builtin::BI__builtin_printf:
6304 case Builtin::BIprintf:
6305 if (getTarget().getTriple().isNVPTX() ||
6306 getTarget().getTriple().isAMDGCN() ||
6307 (getTarget().getTriple().isSPIRV() &&
6308 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
6309 if (getTarget().getTriple().isNVPTX())
6311 if ((getTarget().getTriple().isAMDGCN() ||
6312 getTarget().getTriple().isSPIRV()) &&
6313 getLangOpts().HIP)
6315 }
6316
6317 break;
6318 case Builtin::BI__builtin_canonicalize:
6319 case Builtin::BI__builtin_canonicalizef:
6320 case Builtin::BI__builtin_canonicalizef16:
6321 case Builtin::BI__builtin_canonicalizel:
6322 return RValue::get(
6323 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
6324
6325 case Builtin::BI__builtin_thread_pointer: {
6326 if (!getContext().getTargetInfo().isTLSSupported())
6327 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
6328 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6329 break;
6330 }
6331 case Builtin::BI__builtin_os_log_format:
6332 return emitBuiltinOSLogFormat(*E);
6333
6334 case Builtin::BI__xray_customevent: {
6336 return RValue::getIgnored();
6337
6340 return RValue::getIgnored();
6341
6342 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6343 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6344 return RValue::getIgnored();
6345
6346 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6347 auto FTy = F->getFunctionType();
6348 auto Arg0 = E->getArg(0);
6349 auto Arg0Val = EmitScalarExpr(Arg0);
6350 auto Arg0Ty = Arg0->getType();
6351 auto PTy0 = FTy->getParamType(0);
6352 if (PTy0 != Arg0Val->getType()) {
6353 if (Arg0Ty->isArrayType())
6354 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6355 else
6356 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6357 }
6358 auto Arg1 = EmitScalarExpr(E->getArg(1));
6359 auto PTy1 = FTy->getParamType(1);
6360 if (PTy1 != Arg1->getType())
6361 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6362 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6363 }
6364
6365 case Builtin::BI__xray_typedevent: {
6366 // TODO: There should be a way to always emit events even if the current
6367 // function is not instrumented. Losing events in a stream can cripple
6368 // a trace.
6370 return RValue::getIgnored();
6371
6374 return RValue::getIgnored();
6375
6376 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6377 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6378 return RValue::getIgnored();
6379
6380 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6381 auto FTy = F->getFunctionType();
6382 auto Arg0 = EmitScalarExpr(E->getArg(0));
6383 auto PTy0 = FTy->getParamType(0);
6384 if (PTy0 != Arg0->getType())
6385 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6386 auto Arg1 = E->getArg(1);
6387 auto Arg1Val = EmitScalarExpr(Arg1);
6388 auto Arg1Ty = Arg1->getType();
6389 auto PTy1 = FTy->getParamType(1);
6390 if (PTy1 != Arg1Val->getType()) {
6391 if (Arg1Ty->isArrayType())
6392 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6393 else
6394 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6395 }
6396 auto Arg2 = EmitScalarExpr(E->getArg(2));
6397 auto PTy2 = FTy->getParamType(2);
6398 if (PTy2 != Arg2->getType())
6399 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6400 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6401 }
6402
6403 case Builtin::BI__builtin_ms_va_start:
6404 case Builtin::BI__builtin_ms_va_end:
6405 return RValue::get(
6407 BuiltinID == Builtin::BI__builtin_ms_va_start));
6408
6409 case Builtin::BI__builtin_ms_va_copy: {
6410 // Lower this manually. We can't reliably determine whether or not any
6411 // given va_copy() is for a Win64 va_list from the calling convention
6412 // alone, because it's legal to do this from a System V ABI function.
6413 // With opaque pointer types, we won't have enough information in LLVM
6414 // IR to determine this from the argument types, either. Best to do it
6415 // now, while we have enough information.
6416 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6417 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6418
6419 DestAddr = DestAddr.withElementType(Int8PtrTy);
6420 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6421
6422 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6423 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6424 }
6425
6426 case Builtin::BI__builtin_get_device_side_mangled_name: {
6427 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6428 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6429 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6430 return RValue::get(Str.getPointer());
6431 }
6432 }
6433
6434 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6435 // the call using the normal call path, but using the unmangled
6436 // version of the function name.
6437 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6438 return emitLibraryCall(*this, FD, E,
6439 CGM.getBuiltinLibFunction(FD, BuiltinID));
6440
6441 // If this is a predefined lib function (e.g. malloc), emit the call
6442 // using exactly the normal call path.
6443 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6444 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6445
6446 // Check that a call to a target specific builtin has the correct target
6447 // features.
6448 // This is down here to avoid non-target specific builtins, however, if
6449 // generic builtins start to require generic target features then we
6450 // can move this up to the beginning of the function.
6452
6453 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6454 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6455
6456 // See if we have a target specific intrinsic.
6457 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6458 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6459 StringRef Prefix =
6460 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6461 if (!Prefix.empty()) {
6462 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6463 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6464 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6465 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6466 // NOTE we don't need to perform a compatibility flag check here since the
6467 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6468 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6469 if (IntrinsicID == Intrinsic::not_intrinsic)
6470 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6471 }
6472
6473 if (IntrinsicID != Intrinsic::not_intrinsic) {
6475
6476 // Find out if any arguments are required to be integer constant
6477 // expressions.
6478 unsigned ICEArguments = 0;
6480 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6481 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6482
6483 Function *F = CGM.getIntrinsic(IntrinsicID);
6484 llvm::FunctionType *FTy = F->getFunctionType();
6485
6486 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6487 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6488 // If the intrinsic arg type is different from the builtin arg type
6489 // we need to do a bit cast.
6490 llvm::Type *PTy = FTy->getParamType(i);
6491 if (PTy != ArgValue->getType()) {
6492 // XXX - vector of pointers?
6493 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6494 if (PtrTy->getAddressSpace() !=
6495 ArgValue->getType()->getPointerAddressSpace()) {
6496 ArgValue = Builder.CreateAddrSpaceCast(
6497 ArgValue, llvm::PointerType::get(getLLVMContext(),
6498 PtrTy->getAddressSpace()));
6499 }
6500 }
6501
6502 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6503 // in amx intrinsics.
6504 if (PTy->isX86_AMXTy())
6505 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6506 {ArgValue->getType()}, {ArgValue});
6507 else
6508 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6509 }
6510
6511 Args.push_back(ArgValue);
6512 }
6513
6514 Value *V = Builder.CreateCall(F, Args);
6515 QualType BuiltinRetType = E->getType();
6516
6517 llvm::Type *RetTy = VoidTy;
6518 if (!BuiltinRetType->isVoidType())
6519 RetTy = ConvertType(BuiltinRetType);
6520
6521 if (RetTy != V->getType()) {
6522 // XXX - vector of pointers?
6523 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6524 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6526 V, llvm::PointerType::get(getLLVMContext(),
6527 PtrTy->getAddressSpace()));
6528 }
6529 }
6530
6531 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6532 // in amx intrinsics.
6533 if (V->getType()->isX86_AMXTy())
6534 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6535 {V});
6536 else
6537 V = Builder.CreateBitCast(V, RetTy);
6538 }
6539
6540 if (RetTy->isVoidTy())
6541 return RValue::get(nullptr);
6542
6543 return RValue::get(V);
6544 }
6545
6546 // Some target-specific builtins can have aggregate return values, e.g.
6547 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6548 // ReturnValue to be non-null, so that the target-specific emission code can
6549 // always just emit into it.
6551 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6552 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6553 ReturnValue = ReturnValueSlot(DestPtr, false);
6554 }
6555
6556 // Now see if we can emit a target-specific builtin.
6557 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6558 switch (EvalKind) {
6559 case TEK_Scalar:
6560 if (V->getType()->isVoidTy())
6561 return RValue::get(nullptr);
6562 return RValue::get(V);
6563 case TEK_Aggregate:
6564 return RValue::getAggregate(ReturnValue.getAddress(),
6565 ReturnValue.isVolatile());
6566 case TEK_Complex:
6567 llvm_unreachable("No current target builtin returns complex");
6568 }
6569 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6570 }
6571
6572 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6573 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E, ReturnValue)) {
6574 switch (EvalKind) {
6575 case TEK_Scalar:
6576 if (V->getType()->isVoidTy())
6577 return RValue::get(nullptr);
6578 return RValue::get(V);
6579 case TEK_Aggregate:
6580 return RValue::getAggregate(ReturnValue.getAddress(),
6581 ReturnValue.isVolatile());
6582 case TEK_Complex:
6583 llvm_unreachable("No current hlsl builtin returns complex");
6584 }
6585 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6586 }
6587
6588 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6589 return EmitHipStdParUnsupportedBuiltin(this, FD);
6590
6591 ErrorUnsupported(E, "builtin function");
6592
6593 // Unknown builtin, for now just dump it out and return undef.
6594 return GetUndefRValue(E->getType());
6595}
6596
6598 unsigned BuiltinID, const CallExpr *E,
6599 ReturnValueSlot ReturnValue,
6600 llvm::Triple::ArchType Arch) {
6601 // When compiling in HipStdPar mode we have to be conservative in rejecting
6602 // target specific features in the FE, and defer the possible error to the
6603 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6604 // referenced by an accelerator executable function, we emit an error.
6605 // Returning nullptr here leads to the builtin being handled in
6606 // EmitStdParUnsupportedBuiltin.
6607 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6608 Arch != CGF->getTarget().getTriple().getArch())
6609 return nullptr;
6610
6611 switch (Arch) {
6612 case llvm::Triple::arm:
6613 case llvm::Triple::armeb:
6614 case llvm::Triple::thumb:
6615 case llvm::Triple::thumbeb:
6616 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6617 case llvm::Triple::aarch64:
6618 case llvm::Triple::aarch64_32:
6619 case llvm::Triple::aarch64_be:
6620 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6621 case llvm::Triple::bpfeb:
6622 case llvm::Triple::bpfel:
6623 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6624 case llvm::Triple::x86:
6625 case llvm::Triple::x86_64:
6626 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6627 case llvm::Triple::ppc:
6628 case llvm::Triple::ppcle:
6629 case llvm::Triple::ppc64:
6630 case llvm::Triple::ppc64le:
6631 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6632 case llvm::Triple::r600:
6633 case llvm::Triple::amdgcn:
6634 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6635 case llvm::Triple::systemz:
6636 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6637 case llvm::Triple::nvptx:
6638 case llvm::Triple::nvptx64:
6639 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6640 case llvm::Triple::wasm32:
6641 case llvm::Triple::wasm64:
6642 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6643 case llvm::Triple::hexagon:
6644 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6645 case llvm::Triple::riscv32:
6646 case llvm::Triple::riscv64:
6647 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6648 case llvm::Triple::spirv:
6649 return CGF->EmitSPIRVBuiltinExpr(BuiltinID, E);
6650 case llvm::Triple::spirv64:
6651 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6652 return nullptr;
6653 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6654 default:
6655 return nullptr;
6656 }
6657}
6658
6660 const CallExpr *E,
6661 ReturnValueSlot ReturnValue) {
6662 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6663 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6665 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6666 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6667 }
6668
6669 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6670 getTarget().getTriple().getArch());
6671}
6672
6673static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6674 NeonTypeFlags TypeFlags,
6675 bool HasLegalHalfType = true,
6676 bool V1Ty = false,
6677 bool AllowBFloatArgsAndRet = true) {
6678 int IsQuad = TypeFlags.isQuad();
6679 switch (TypeFlags.getEltType()) {
6682 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6685 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6687 if (AllowBFloatArgsAndRet)
6688 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6689 else
6690 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6692 if (HasLegalHalfType)
6693 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6694 else
6695 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6697 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6700 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6702 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6703 // There is a lot of i128 and f128 API missing.
6704 // so we use v16i8 to represent poly128 and get pattern matched.
6705 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6707 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6709 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6710 }
6711 llvm_unreachable("Unknown vector element type!");
6712}
6713
6714static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6715 NeonTypeFlags IntTypeFlags) {
6716 int IsQuad = IntTypeFlags.isQuad();
6717 switch (IntTypeFlags.getEltType()) {
6719 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6721 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6723 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6724 default:
6725 llvm_unreachable("Type can't be converted to floating-point!");
6726 }
6727}
6728
6730 const ElementCount &Count) {
6731 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6732 return Builder.CreateShuffleVector(V, V, SV, "lane");
6733}
6734
6736 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6737 return EmitNeonSplat(V, C, EC);
6738}
6739
6741 const char *name,
6742 unsigned shift, bool rightshift) {
6743 unsigned j = 0;
6744 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6745 ai != ae; ++ai, ++j) {
6746 if (F->isConstrainedFPIntrinsic())
6747 if (ai->getType()->isMetadataTy())
6748 continue;
6749 if (shift > 0 && shift == j)
6750 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6751 else
6752 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6753 }
6754
6755 if (F->isConstrainedFPIntrinsic())
6756 return Builder.CreateConstrainedFPCall(F, Ops, name);
6757 else
6758 return Builder.CreateCall(F, Ops, name);
6759}
6760
6762 bool neg) {
6763 int SV = cast<ConstantInt>(V)->getSExtValue();
6764 return ConstantInt::get(Ty, neg ? -SV : SV);
6765}
6766
6767// Right-shift a vector by a constant.
6769 llvm::Type *Ty, bool usgn,
6770 const char *name) {
6771 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6772
6773 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6774 int EltSize = VTy->getScalarSizeInBits();
6775
6776 Vec = Builder.CreateBitCast(Vec, Ty);
6777
6778 // lshr/ashr are undefined when the shift amount is equal to the vector
6779 // element size.
6780 if (ShiftAmt == EltSize) {
6781 if (usgn) {
6782 // Right-shifting an unsigned value by its size yields 0.
6783 return llvm::ConstantAggregateZero::get(VTy);
6784 } else {
6785 // Right-shifting a signed value by its size is equivalent
6786 // to a shift of size-1.
6787 --ShiftAmt;
6788 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6789 }
6790 }
6791
6792 Shift = EmitNeonShiftVector(Shift, Ty, false);
6793 if (usgn)
6794 return Builder.CreateLShr(Vec, Shift, name);
6795 else
6796 return Builder.CreateAShr(Vec, Shift, name);
6797}
6798
6799enum {
6800 AddRetType = (1 << 0),
6801 Add1ArgType = (1 << 1),
6802 Add2ArgTypes = (1 << 2),
6803
6806
6808 UnsignedAlts = (1 << 6),
6809
6812
6820
6821namespace {
6822struct ARMVectorIntrinsicInfo {
6823 const char *NameHint;
6824 unsigned BuiltinID;
6825 unsigned LLVMIntrinsic;
6826 unsigned AltLLVMIntrinsic;
6828
6829 bool operator<(unsigned RHSBuiltinID) const {
6830 return BuiltinID < RHSBuiltinID;
6831 }
6832 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6833 return BuiltinID < TE.BuiltinID;
6834 }
6835};
6836} // end anonymous namespace
6837
6838#define NEONMAP0(NameBase) \
6839 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6840
6841#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6842 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6843 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6844
6845#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6846 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6847 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6848 TypeModifier }
6849
6850static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6851 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6852 NEONMAP0(splat_lane_v),
6853 NEONMAP0(splat_laneq_v),
6854 NEONMAP0(splatq_lane_v),
6855 NEONMAP0(splatq_laneq_v),
6856 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6857 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6858 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6859 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6860 NEONMAP0(vadd_v),
6861 NEONMAP0(vaddhn_v),
6862 NEONMAP0(vaddq_v),
6863 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6864 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6865 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6866 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6867 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6868 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6869 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6870 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6871 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6872 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6873 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6874 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6875 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6876 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6877 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6878 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6879 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6880 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6881 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6882 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6883 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6884 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6885 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6886 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6887 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6888 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6889 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6890 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6891 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6892 NEONMAP0(vceqz_v),
6893 NEONMAP0(vceqzq_v),
6894 NEONMAP0(vcgez_v),
6895 NEONMAP0(vcgezq_v),
6896 NEONMAP0(vcgtz_v),
6897 NEONMAP0(vcgtzq_v),
6898 NEONMAP0(vclez_v),
6899 NEONMAP0(vclezq_v),
6900 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6901 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6902 NEONMAP0(vcltz_v),
6903 NEONMAP0(vcltzq_v),
6904 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6905 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6906 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6907 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6908 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6909 NEONMAP0(vcvt_f16_s16),
6910 NEONMAP0(vcvt_f16_u16),
6911 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6912 NEONMAP0(vcvt_f32_v),
6913 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6914 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6915 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6916 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6917 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6918 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6919 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6920 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6921 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6922 NEONMAP0(vcvt_s16_f16),
6923 NEONMAP0(vcvt_s32_v),
6924 NEONMAP0(vcvt_s64_v),
6925 NEONMAP0(vcvt_u16_f16),
6926 NEONMAP0(vcvt_u32_v),
6927 NEONMAP0(vcvt_u64_v),
6928 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6929 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6930 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6931 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6932 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6933 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6934 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6935 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6936 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6937 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6938 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6939 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6940 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6941 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6942 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6943 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6944 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6945 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6946 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6947 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6948 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6949 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6950 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6951 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6952 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6953 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6954 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6955 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6956 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6957 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6958 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6959 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6960 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6961 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6962 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6963 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6964 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6965 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6966 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6967 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6968 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6969 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6970 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6971 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6972 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6973 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6974 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6975 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6976 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6977 NEONMAP0(vcvtq_f16_s16),
6978 NEONMAP0(vcvtq_f16_u16),
6979 NEONMAP0(vcvtq_f32_v),
6980 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6981 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6982 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6983 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6984 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6985 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6986 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6987 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6988 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6989 NEONMAP0(vcvtq_s16_f16),
6990 NEONMAP0(vcvtq_s32_v),
6991 NEONMAP0(vcvtq_s64_v),
6992 NEONMAP0(vcvtq_u16_f16),
6993 NEONMAP0(vcvtq_u32_v),
6994 NEONMAP0(vcvtq_u64_v),
6995 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6996 NEONMAP1(vdot_u32, arm_neon_udot, 0),
6997 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6998 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6999 NEONMAP0(vext_v),
7000 NEONMAP0(vextq_v),
7001 NEONMAP0(vfma_v),
7002 NEONMAP0(vfmaq_v),
7003 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7004 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7005 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7006 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7007 NEONMAP0(vld1_dup_v),
7008 NEONMAP1(vld1_v, arm_neon_vld1, 0),
7009 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
7010 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
7011 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
7012 NEONMAP0(vld1q_dup_v),
7013 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
7014 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
7015 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
7016 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
7017 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
7018 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
7019 NEONMAP1(vld2_v, arm_neon_vld2, 0),
7020 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
7021 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
7022 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
7023 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
7024 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
7025 NEONMAP1(vld3_v, arm_neon_vld3, 0),
7026 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
7027 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
7028 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
7029 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
7030 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
7031 NEONMAP1(vld4_v, arm_neon_vld4, 0),
7032 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
7033 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
7034 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
7035 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7036 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
7037 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
7038 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7039 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7040 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
7041 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
7042 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7043 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
7044 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
7045 NEONMAP0(vmovl_v),
7046 NEONMAP0(vmovn_v),
7047 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
7048 NEONMAP0(vmull_v),
7049 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
7050 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7051 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7052 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
7053 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7054 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7055 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
7056 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
7057 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
7058 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
7059 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
7060 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7061 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7062 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
7063 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
7064 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
7065 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
7066 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
7067 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
7068 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
7069 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
7070 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
7071 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
7072 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
7073 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
7074 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
7075 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
7076 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
7077 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
7078 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
7079 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
7080 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
7081 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7082 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7083 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7084 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7085 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7086 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7087 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
7088 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
7089 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7090 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7091 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
7092 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7093 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7094 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
7095 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
7096 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7097 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7098 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
7099 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
7100 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
7101 NEONMAP0(vrndi_v),
7102 NEONMAP0(vrndiq_v),
7103 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
7104 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
7105 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
7106 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
7107 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
7108 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
7109 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
7110 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
7111 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
7112 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7113 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7114 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7115 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7116 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7117 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7118 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
7119 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
7120 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
7121 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
7122 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
7123 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
7124 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
7125 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
7126 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
7127 NEONMAP0(vshl_n_v),
7128 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7129 NEONMAP0(vshll_n_v),
7130 NEONMAP0(vshlq_n_v),
7131 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7132 NEONMAP0(vshr_n_v),
7133 NEONMAP0(vshrn_n_v),
7134 NEONMAP0(vshrq_n_v),
7135 NEONMAP1(vst1_v, arm_neon_vst1, 0),
7136 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
7137 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
7138 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
7139 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
7140 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
7141 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
7142 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
7143 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
7144 NEONMAP1(vst2_v, arm_neon_vst2, 0),
7145 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
7146 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
7147 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
7148 NEONMAP1(vst3_v, arm_neon_vst3, 0),
7149 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
7150 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
7151 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
7152 NEONMAP1(vst4_v, arm_neon_vst4, 0),
7153 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
7154 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
7155 NEONMAP0(vsubhn_v),
7156 NEONMAP0(vtrn_v),
7157 NEONMAP0(vtrnq_v),
7158 NEONMAP0(vtst_v),
7159 NEONMAP0(vtstq_v),
7160 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
7161 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
7162 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
7163 NEONMAP0(vuzp_v),
7164 NEONMAP0(vuzpq_v),
7165 NEONMAP0(vzip_v),
7166 NEONMAP0(vzipq_v)
7167};
7168
7169static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7170 NEONMAP0(splat_lane_v),
7171 NEONMAP0(splat_laneq_v),
7172 NEONMAP0(splatq_lane_v),
7173 NEONMAP0(splatq_laneq_v),
7174 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
7175 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
7176 NEONMAP0(vadd_v),
7177 NEONMAP0(vaddhn_v),
7178 NEONMAP0(vaddq_p128),
7179 NEONMAP0(vaddq_v),
7180 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
7181 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
7182 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
7183 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
7184 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7185 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7186 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7187 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7188 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7189 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7190 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7191 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7192 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
7193 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
7194 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
7195 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
7196 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
7197 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7198 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7199 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7200 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7201 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7202 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7203 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
7204 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7205 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7206 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
7207 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
7208 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
7209 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
7210 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
7211 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
7212 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
7213 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
7214 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
7215 NEONMAP0(vceqz_v),
7216 NEONMAP0(vceqzq_v),
7217 NEONMAP0(vcgez_v),
7218 NEONMAP0(vcgezq_v),
7219 NEONMAP0(vcgtz_v),
7220 NEONMAP0(vcgtzq_v),
7221 NEONMAP0(vclez_v),
7222 NEONMAP0(vclezq_v),
7223 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
7224 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
7225 NEONMAP0(vcltz_v),
7226 NEONMAP0(vcltzq_v),
7227 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7228 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7229 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7230 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7231 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7232 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7233 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7234 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7235 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7236 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7237 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7238 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7239 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
7240 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7241 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7242 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
7243 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7244 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7245 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
7246 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7247 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7248 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
7249 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7250 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7251 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
7252 NEONMAP0(vcvt_f16_s16),
7253 NEONMAP0(vcvt_f16_u16),
7254 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
7255 NEONMAP0(vcvt_f32_v),
7256 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7257 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7258 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7259 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7260 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7261 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7262 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7263 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7264 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7265 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7266 NEONMAP0(vcvtq_f16_s16),
7267 NEONMAP0(vcvtq_f16_u16),
7268 NEONMAP0(vcvtq_f32_v),
7269 NEONMAP0(vcvtq_high_bf16_f32),
7270 NEONMAP0(vcvtq_low_bf16_f32),
7271 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7272 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7273 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7274 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7275 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7276 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7277 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7278 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7279 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7280 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7281 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
7282 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
7283 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
7284 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
7285 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
7286 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7287 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7288 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7289 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7290 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7291 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7292 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7293 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7294 NEONMAP0(vext_v),
7295 NEONMAP0(vextq_v),
7296 NEONMAP0(vfma_v),
7297 NEONMAP0(vfmaq_v),
7298 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
7299 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
7300 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
7301 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
7302 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
7303 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
7304 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
7305 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
7306 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7307 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7308 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7309 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7310 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
7311 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
7312 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
7313 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
7314 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
7315 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
7316 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
7317 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
7318 NEONMAP0(vmovl_v),
7319 NEONMAP0(vmovn_v),
7320 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
7321 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
7322 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
7323 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7324 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7325 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
7326 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
7327 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
7328 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7329 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7330 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
7331 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
7332 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
7333 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7334 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
7335 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
7336 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7337 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
7338 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
7339 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
7340 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
7341 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
7342 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7343 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7344 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7345 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7346 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7347 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7348 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7349 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7350 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7351 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7352 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7353 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7354 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7355 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7356 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7357 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7358 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7359 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7360 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7361 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7362 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7363 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7364 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7365 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7366 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7367 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7368 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7369 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7370 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7371 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7372 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7373 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7374 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7375 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7376 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7377 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7378 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7379 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7380 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7381 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7382 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7383 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7384 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7385 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7386 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7387 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7388 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7389 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7390 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7391 NEONMAP0(vrndi_v),
7392 NEONMAP0(vrndiq_v),
7393 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7394 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7395 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7396 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7397 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7398 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7399 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7400 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7401 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7402 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7403 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7404 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7405 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7406 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7407 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7408 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7409 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7410 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7411 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7412 NEONMAP0(vshl_n_v),
7413 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7414 NEONMAP0(vshll_n_v),
7415 NEONMAP0(vshlq_n_v),
7416 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7417 NEONMAP0(vshr_n_v),
7418 NEONMAP0(vshrn_n_v),
7419 NEONMAP0(vshrq_n_v),
7420 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7421 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7422 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7423 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7424 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7425 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7426 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7427 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7428 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7429 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7430 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7431 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7432 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7433 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7434 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7435 NEONMAP0(vsubhn_v),
7436 NEONMAP0(vtst_v),
7437 NEONMAP0(vtstq_v),
7438 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7439 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7440 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7441 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7442};
7443
7444static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7445 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7446 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7447 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7448 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7449 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7450 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7451 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7452 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7453 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7454 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7455 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7456 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7457 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7458 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7459 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7460 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7461 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7462 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7463 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7464 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7465 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7466 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7467 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7468 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7469 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7470 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7471 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7472 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7473 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7474 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7475 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7476 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7477 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7478 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7479 NEONMAP0(vcvth_bf16_f32),
7480 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7481 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7482 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7483 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7484 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7485 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7486 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7487 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7488 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7489 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7490 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7491 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7492 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7493 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7494 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7495 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7496 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7497 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7498 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7499 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7500 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7501 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7502 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7503 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7504 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7505 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7506 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7507 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7508 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7509 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7510 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7511 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7512 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7513 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7514 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7515 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7516 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7517 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7518 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7519 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7520 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7521 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7522 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7523 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7524 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7525 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7526 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7527 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7528 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7529 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7530 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7531 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7532 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7533 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7534 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7535 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7536 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7537 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7538 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7539 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7540 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7541 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7542 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7543 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7544 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7545 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7546 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7547 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7548 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7549 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7550 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7551 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7552 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7553 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7554 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7555 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7556 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7557 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7558 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7559 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7560 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7561 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7562 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7563 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7564 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7565 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7566 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7567 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7568 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7569 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7570 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7571 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7572 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7573 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7574 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7575 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7576 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7577 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7578 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7579 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7580 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7581 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7582 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7583 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7584 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7585 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7586 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7587 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7588 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7589 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7590 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7591 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7592 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7593 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7594 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7595 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7596 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7597 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7598 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7599 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7600 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7601 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7602 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7603 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7604 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7605 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7606 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7607 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7608 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7609 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7610 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7611 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7612 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7613 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7614 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7615 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7616 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7617 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7618 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7619 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7620 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7621 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7622 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7623 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7624 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7625 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7626 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7627 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7628 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7629 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7630 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7631 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7632 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7633 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7634 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7635 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7636 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7637 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7638 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7639 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7640 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7641 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7642 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7643 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7644 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7645 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7646 // FP16 scalar intrinisics go here.
7647 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7648 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7649 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7650 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7651 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7652 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7653 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7654 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7655 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7656 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7657 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7658 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7659 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7660 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7661 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7662 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7663 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7664 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7665 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7666 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7667 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7668 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7669 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7670 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7671 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7672 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7673 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7674 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7675 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7676 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7677 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7678 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7679 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7680 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7681};
7682
7683// Some intrinsics are equivalent for codegen.
7684static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7685 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7686 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7687 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7688 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7689 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7690 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7691 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7692 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7693 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7694 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7695 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7696 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7697 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7698 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7699 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7700 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7701 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7702 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7703 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7704 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7705 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7706 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7707 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7708 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7709 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7710 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7711 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7712 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7713 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7714 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7715 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7716 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7717 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7718 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7719 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7720 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7721 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7722 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7723 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7724 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7725 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7726 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7727 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7728 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7729 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7730 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7731 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7732 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7733 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7734 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7735 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7736 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7737 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7738 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7739 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7740 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7741 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7742 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7743 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7744 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7745 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7746 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7747 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7748 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7749 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7750 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7751 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7752 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7753 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7754 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7755 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7756 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7757 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7758 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7759 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7760 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7761 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7762 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7763 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7764 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7765 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7766 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7767 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7768 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7769 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7770 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7771 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7772 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7773 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7774 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7775 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7776 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7777 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7778 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7779 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7780 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7781 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7782 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7783 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7784 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7785 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7786 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7787 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7788 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7789 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7790 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7791 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7792 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7793 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7794 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7795 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7796 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7797 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7798 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7799 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7800 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7801 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7802 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7803 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7804 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7805 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7806 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7807 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7808 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7809 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7810 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7811 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7812 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7813 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7814 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7815 // arbitrary one to be handled as tha canonical variation.
7816 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7817 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7818 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7819 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7820 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7821 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7822 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7823 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7824 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7825 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7826 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7827 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7828};
7829
7830#undef NEONMAP0
7831#undef NEONMAP1
7832#undef NEONMAP2
7833
7834#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7835 { \
7836 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7837 TypeModifier \
7838 }
7839
7840#define SVEMAP2(NameBase, TypeModifier) \
7841 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7842static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7843#define GET_SVE_LLVM_INTRINSIC_MAP
7844#include "clang/Basic/arm_sve_builtin_cg.inc"
7845#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7846#undef GET_SVE_LLVM_INTRINSIC_MAP
7847};
7848
7849#undef SVEMAP1
7850#undef SVEMAP2
7851
7852#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7853 { \
7854 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7855 TypeModifier \
7856 }
7857
7858#define SMEMAP2(NameBase, TypeModifier) \
7859 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7860static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7861#define GET_SME_LLVM_INTRINSIC_MAP
7862#include "clang/Basic/arm_sme_builtin_cg.inc"
7863#undef GET_SME_LLVM_INTRINSIC_MAP
7864};
7865
7866#undef SMEMAP1
7867#undef SMEMAP2
7868
7870
7875
7876static const ARMVectorIntrinsicInfo *
7878 unsigned BuiltinID, bool &MapProvenSorted) {
7879
7880#ifndef NDEBUG
7881 if (!MapProvenSorted) {
7882 assert(llvm::is_sorted(IntrinsicMap));
7883 MapProvenSorted = true;
7884 }
7885#endif
7886
7887 const ARMVectorIntrinsicInfo *Builtin =
7888 llvm::lower_bound(IntrinsicMap, BuiltinID);
7889
7890 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7891 return Builtin;
7892
7893 return nullptr;
7894}
7895
7897 unsigned Modifier,
7898 llvm::Type *ArgType,
7899 const CallExpr *E) {
7900 int VectorSize = 0;
7901 if (Modifier & Use64BitVectors)
7902 VectorSize = 64;
7903 else if (Modifier & Use128BitVectors)
7904 VectorSize = 128;
7905
7906 // Return type.
7908 if (Modifier & AddRetType) {
7909 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7910 if (Modifier & VectorizeRetType)
7911 Ty = llvm::FixedVectorType::get(
7912 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7913
7914 Tys.push_back(Ty);
7915 }
7916
7917 // Arguments.
7918 if (Modifier & VectorizeArgTypes) {
7919 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7920 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7921 }
7922
7923 if (Modifier & (Add1ArgType | Add2ArgTypes))
7924 Tys.push_back(ArgType);
7925
7926 if (Modifier & Add2ArgTypes)
7927 Tys.push_back(ArgType);
7928
7929 if (Modifier & InventFloatType)
7930 Tys.push_back(FloatTy);
7931
7932 return CGM.getIntrinsic(IntrinsicID, Tys);
7933}
7934
7936 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7937 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7938 unsigned BuiltinID = SISDInfo.BuiltinID;
7939 unsigned int Int = SISDInfo.LLVMIntrinsic;
7940 unsigned Modifier = SISDInfo.TypeModifier;
7941 const char *s = SISDInfo.NameHint;
7942
7943 switch (BuiltinID) {
7944 case NEON::BI__builtin_neon_vcled_s64:
7945 case NEON::BI__builtin_neon_vcled_u64:
7946 case NEON::BI__builtin_neon_vcles_f32:
7947 case NEON::BI__builtin_neon_vcled_f64:
7948 case NEON::BI__builtin_neon_vcltd_s64:
7949 case NEON::BI__builtin_neon_vcltd_u64:
7950 case NEON::BI__builtin_neon_vclts_f32:
7951 case NEON::BI__builtin_neon_vcltd_f64:
7952 case NEON::BI__builtin_neon_vcales_f32:
7953 case NEON::BI__builtin_neon_vcaled_f64:
7954 case NEON::BI__builtin_neon_vcalts_f32:
7955 case NEON::BI__builtin_neon_vcaltd_f64:
7956 // Only one direction of comparisons actually exist, cmle is actually a cmge
7957 // with swapped operands. The table gives us the right intrinsic but we
7958 // still need to do the swap.
7959 std::swap(Ops[0], Ops[1]);
7960 break;
7961 }
7962
7963 assert(Int && "Generic code assumes a valid intrinsic");
7964
7965 // Determine the type(s) of this overloaded AArch64 intrinsic.
7966 const Expr *Arg = E->getArg(0);
7967 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7968 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7969
7970 int j = 0;
7971 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7972 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7973 ai != ae; ++ai, ++j) {
7974 llvm::Type *ArgTy = ai->getType();
7975 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7976 ArgTy->getPrimitiveSizeInBits())
7977 continue;
7978
7979 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7980 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7981 // it before inserting.
7982 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7983 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7984 Ops[j] =
7985 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7986 }
7987
7988 Value *Result = CGF.EmitNeonCall(F, Ops, s);
7989 llvm::Type *ResultType = CGF.ConvertType(E->getType());
7990 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7991 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7992 return CGF.Builder.CreateExtractElement(Result, C0);
7993
7994 return CGF.Builder.CreateBitCast(Result, ResultType, s);
7995}
7996
7998 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7999 const char *NameHint, unsigned Modifier, const CallExpr *E,
8000 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
8001 llvm::Triple::ArchType Arch) {
8002 // Get the last argument, which specifies the vector type.
8003 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
8004 std::optional<llvm::APSInt> NeonTypeConst =
8006 if (!NeonTypeConst)
8007 return nullptr;
8008
8009 // Determine the type of this overloaded NEON intrinsic.
8010 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
8011 bool Usgn = Type.isUnsigned();
8012 bool Quad = Type.isQuad();
8013 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
8014 const bool AllowBFloatArgsAndRet =
8015 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
8016
8017 llvm::FixedVectorType *VTy =
8018 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
8019 llvm::Type *Ty = VTy;
8020 if (!Ty)
8021 return nullptr;
8022
8023 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8024 return Builder.getInt32(addr.getAlignment().getQuantity());
8025 };
8026
8027 unsigned Int = LLVMIntrinsic;
8028 if ((Modifier & UnsignedAlts) && !Usgn)
8029 Int = AltLLVMIntrinsic;
8030
8031 switch (BuiltinID) {
8032 default: break;
8033 case NEON::BI__builtin_neon_splat_lane_v:
8034 case NEON::BI__builtin_neon_splat_laneq_v:
8035 case NEON::BI__builtin_neon_splatq_lane_v:
8036 case NEON::BI__builtin_neon_splatq_laneq_v: {
8037 auto NumElements = VTy->getElementCount();
8038 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
8039 NumElements = NumElements * 2;
8040 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
8041 NumElements = NumElements.divideCoefficientBy(2);
8042
8043 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8044 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
8045 }
8046 case NEON::BI__builtin_neon_vpadd_v:
8047 case NEON::BI__builtin_neon_vpaddq_v:
8048 // We don't allow fp/int overloading of intrinsics.
8049 if (VTy->getElementType()->isFloatingPointTy() &&
8050 Int == Intrinsic::aarch64_neon_addp)
8051 Int = Intrinsic::aarch64_neon_faddp;
8052 break;
8053 case NEON::BI__builtin_neon_vabs_v:
8054 case NEON::BI__builtin_neon_vabsq_v:
8055 if (VTy->getElementType()->isFloatingPointTy())
8056 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
8057 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
8058 case NEON::BI__builtin_neon_vadd_v:
8059 case NEON::BI__builtin_neon_vaddq_v: {
8060 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
8061 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8062 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8063 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
8064 return Builder.CreateBitCast(Ops[0], Ty);
8065 }
8066 case NEON::BI__builtin_neon_vaddhn_v: {
8067 llvm::FixedVectorType *SrcTy =
8068 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8069
8070 // %sum = add <4 x i32> %lhs, %rhs
8071 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8072 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8073 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
8074
8075 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8076 Constant *ShiftAmt =
8077 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8078 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
8079
8080 // %res = trunc <4 x i32> %high to <4 x i16>
8081 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
8082 }
8083 case NEON::BI__builtin_neon_vcale_v:
8084 case NEON::BI__builtin_neon_vcaleq_v:
8085 case NEON::BI__builtin_neon_vcalt_v:
8086 case NEON::BI__builtin_neon_vcaltq_v:
8087 std::swap(Ops[0], Ops[1]);
8088 [[fallthrough]];
8089 case NEON::BI__builtin_neon_vcage_v:
8090 case NEON::BI__builtin_neon_vcageq_v:
8091 case NEON::BI__builtin_neon_vcagt_v:
8092 case NEON::BI__builtin_neon_vcagtq_v: {
8093 llvm::Type *Ty;
8094 switch (VTy->getScalarSizeInBits()) {
8095 default: llvm_unreachable("unexpected type");
8096 case 32:
8097 Ty = FloatTy;
8098 break;
8099 case 64:
8100 Ty = DoubleTy;
8101 break;
8102 case 16:
8103 Ty = HalfTy;
8104 break;
8105 }
8106 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
8107 llvm::Type *Tys[] = { VTy, VecFlt };
8108 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8109 return EmitNeonCall(F, Ops, NameHint);
8110 }
8111 case NEON::BI__builtin_neon_vceqz_v:
8112 case NEON::BI__builtin_neon_vceqzq_v:
8113 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
8114 ICmpInst::ICMP_EQ, "vceqz");
8115 case NEON::BI__builtin_neon_vcgez_v:
8116 case NEON::BI__builtin_neon_vcgezq_v:
8117 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
8118 ICmpInst::ICMP_SGE, "vcgez");
8119 case NEON::BI__builtin_neon_vclez_v:
8120 case NEON::BI__builtin_neon_vclezq_v:
8121 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
8122 ICmpInst::ICMP_SLE, "vclez");
8123 case NEON::BI__builtin_neon_vcgtz_v:
8124 case NEON::BI__builtin_neon_vcgtzq_v:
8125 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
8126 ICmpInst::ICMP_SGT, "vcgtz");
8127 case NEON::BI__builtin_neon_vcltz_v:
8128 case NEON::BI__builtin_neon_vcltzq_v:
8129 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
8130 ICmpInst::ICMP_SLT, "vcltz");
8131 case NEON::BI__builtin_neon_vclz_v:
8132 case NEON::BI__builtin_neon_vclzq_v:
8133 // We generate target-independent intrinsic, which needs a second argument
8134 // for whether or not clz of zero is undefined; on ARM it isn't.
8135 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
8136 break;
8137 case NEON::BI__builtin_neon_vcvt_f32_v:
8138 case NEON::BI__builtin_neon_vcvtq_f32_v:
8139 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8140 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
8141 HasLegalHalfType);
8142 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8143 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8144 case NEON::BI__builtin_neon_vcvt_f16_s16:
8145 case NEON::BI__builtin_neon_vcvt_f16_u16:
8146 case NEON::BI__builtin_neon_vcvtq_f16_s16:
8147 case NEON::BI__builtin_neon_vcvtq_f16_u16:
8148 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8149 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
8150 HasLegalHalfType);
8151 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8152 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8153 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
8154 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
8155 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
8156 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
8157 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8158 Function *F = CGM.getIntrinsic(Int, Tys);
8159 return EmitNeonCall(F, Ops, "vcvt_n");
8160 }
8161 case NEON::BI__builtin_neon_vcvt_n_f32_v:
8162 case NEON::BI__builtin_neon_vcvt_n_f64_v:
8163 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
8164 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
8165 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8166 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
8167 Function *F = CGM.getIntrinsic(Int, Tys);
8168 return EmitNeonCall(F, Ops, "vcvt_n");
8169 }
8170 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
8171 case NEON::BI__builtin_neon_vcvt_n_s32_v:
8172 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
8173 case NEON::BI__builtin_neon_vcvt_n_u32_v:
8174 case NEON::BI__builtin_neon_vcvt_n_s64_v:
8175 case NEON::BI__builtin_neon_vcvt_n_u64_v:
8176 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
8177 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
8178 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
8179 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
8180 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
8181 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
8182 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8183 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8184 return EmitNeonCall(F, Ops, "vcvt_n");
8185 }
8186 case NEON::BI__builtin_neon_vcvt_s32_v:
8187 case NEON::BI__builtin_neon_vcvt_u32_v:
8188 case NEON::BI__builtin_neon_vcvt_s64_v:
8189 case NEON::BI__builtin_neon_vcvt_u64_v:
8190 case NEON::BI__builtin_neon_vcvt_s16_f16:
8191 case NEON::BI__builtin_neon_vcvt_u16_f16:
8192 case NEON::BI__builtin_neon_vcvtq_s32_v:
8193 case NEON::BI__builtin_neon_vcvtq_u32_v:
8194 case NEON::BI__builtin_neon_vcvtq_s64_v:
8195 case NEON::BI__builtin_neon_vcvtq_u64_v:
8196 case NEON::BI__builtin_neon_vcvtq_s16_f16:
8197 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
8198 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8199 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
8200 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
8201 }
8202 case NEON::BI__builtin_neon_vcvta_s16_f16:
8203 case NEON::BI__builtin_neon_vcvta_s32_v:
8204 case NEON::BI__builtin_neon_vcvta_s64_v:
8205 case NEON::BI__builtin_neon_vcvta_u16_f16:
8206 case NEON::BI__builtin_neon_vcvta_u32_v:
8207 case NEON::BI__builtin_neon_vcvta_u64_v:
8208 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
8209 case NEON::BI__builtin_neon_vcvtaq_s32_v:
8210 case NEON::BI__builtin_neon_vcvtaq_s64_v:
8211 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
8212 case NEON::BI__builtin_neon_vcvtaq_u32_v:
8213 case NEON::BI__builtin_neon_vcvtaq_u64_v:
8214 case NEON::BI__builtin_neon_vcvtn_s16_f16:
8215 case NEON::BI__builtin_neon_vcvtn_s32_v:
8216 case NEON::BI__builtin_neon_vcvtn_s64_v:
8217 case NEON::BI__builtin_neon_vcvtn_u16_f16:
8218 case NEON::BI__builtin_neon_vcvtn_u32_v:
8219 case NEON::BI__builtin_neon_vcvtn_u64_v:
8220 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
8221 case NEON::BI__builtin_neon_vcvtnq_s32_v:
8222 case NEON::BI__builtin_neon_vcvtnq_s64_v:
8223 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
8224 case NEON::BI__builtin_neon_vcvtnq_u32_v:
8225 case NEON::BI__builtin_neon_vcvtnq_u64_v:
8226 case NEON::BI__builtin_neon_vcvtp_s16_f16:
8227 case NEON::BI__builtin_neon_vcvtp_s32_v:
8228 case NEON::BI__builtin_neon_vcvtp_s64_v:
8229 case NEON::BI__builtin_neon_vcvtp_u16_f16:
8230 case NEON::BI__builtin_neon_vcvtp_u32_v:
8231 case NEON::BI__builtin_neon_vcvtp_u64_v:
8232 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
8233 case NEON::BI__builtin_neon_vcvtpq_s32_v:
8234 case NEON::BI__builtin_neon_vcvtpq_s64_v:
8235 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
8236 case NEON::BI__builtin_neon_vcvtpq_u32_v:
8237 case NEON::BI__builtin_neon_vcvtpq_u64_v:
8238 case NEON::BI__builtin_neon_vcvtm_s16_f16:
8239 case NEON::BI__builtin_neon_vcvtm_s32_v:
8240 case NEON::BI__builtin_neon_vcvtm_s64_v:
8241 case NEON::BI__builtin_neon_vcvtm_u16_f16:
8242 case NEON::BI__builtin_neon_vcvtm_u32_v:
8243 case NEON::BI__builtin_neon_vcvtm_u64_v:
8244 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
8245 case NEON::BI__builtin_neon_vcvtmq_s32_v:
8246 case NEON::BI__builtin_neon_vcvtmq_s64_v:
8247 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
8248 case NEON::BI__builtin_neon_vcvtmq_u32_v:
8249 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8250 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8251 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8252 }
8253 case NEON::BI__builtin_neon_vcvtx_f32_v: {
8254 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
8255 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8256
8257 }
8258 case NEON::BI__builtin_neon_vext_v:
8259 case NEON::BI__builtin_neon_vextq_v: {
8260 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
8261 SmallVector<int, 16> Indices;
8262 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8263 Indices.push_back(i+CV);
8264
8265 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8266 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8267 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
8268 }
8269 case NEON::BI__builtin_neon_vfma_v:
8270 case NEON::BI__builtin_neon_vfmaq_v: {
8271 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8272 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8273 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8274
8275 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
8277 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
8278 {Ops[1], Ops[2], Ops[0]});
8279 }
8280 case NEON::BI__builtin_neon_vld1_v:
8281 case NEON::BI__builtin_neon_vld1q_v: {
8282 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8283 Ops.push_back(getAlignmentValue32(PtrOp0));
8284 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
8285 }
8286 case NEON::BI__builtin_neon_vld1_x2_v:
8287 case NEON::BI__builtin_neon_vld1q_x2_v:
8288 case NEON::BI__builtin_neon_vld1_x3_v:
8289 case NEON::BI__builtin_neon_vld1q_x3_v:
8290 case NEON::BI__builtin_neon_vld1_x4_v:
8291 case NEON::BI__builtin_neon_vld1q_x4_v: {
8292 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8293 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8294 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
8295 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8296 }
8297 case NEON::BI__builtin_neon_vld2_v:
8298 case NEON::BI__builtin_neon_vld2q_v:
8299 case NEON::BI__builtin_neon_vld3_v:
8300 case NEON::BI__builtin_neon_vld3q_v:
8301 case NEON::BI__builtin_neon_vld4_v:
8302 case NEON::BI__builtin_neon_vld4q_v:
8303 case NEON::BI__builtin_neon_vld2_dup_v:
8304 case NEON::BI__builtin_neon_vld2q_dup_v:
8305 case NEON::BI__builtin_neon_vld3_dup_v:
8306 case NEON::BI__builtin_neon_vld3q_dup_v:
8307 case NEON::BI__builtin_neon_vld4_dup_v:
8308 case NEON::BI__builtin_neon_vld4q_dup_v: {
8309 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8310 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8311 Value *Align = getAlignmentValue32(PtrOp1);
8312 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
8313 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8314 }
8315 case NEON::BI__builtin_neon_vld1_dup_v:
8316 case NEON::BI__builtin_neon_vld1q_dup_v: {
8317 Value *V = PoisonValue::get(Ty);
8318 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8319 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
8320 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
8321 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
8322 return EmitNeonSplat(Ops[0], CI);
8323 }
8324 case NEON::BI__builtin_neon_vld2_lane_v:
8325 case NEON::BI__builtin_neon_vld2q_lane_v:
8326 case NEON::BI__builtin_neon_vld3_lane_v:
8327 case NEON::BI__builtin_neon_vld3q_lane_v:
8328 case NEON::BI__builtin_neon_vld4_lane_v:
8329 case NEON::BI__builtin_neon_vld4q_lane_v: {
8330 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8331 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8332 for (unsigned I = 2; I < Ops.size() - 1; ++I)
8333 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
8334 Ops.push_back(getAlignmentValue32(PtrOp1));
8335 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
8336 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8337 }
8338 case NEON::BI__builtin_neon_vmovl_v: {
8339 llvm::FixedVectorType *DTy =
8340 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8341 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
8342 if (Usgn)
8343 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8344 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8345 }
8346 case NEON::BI__builtin_neon_vmovn_v: {
8347 llvm::FixedVectorType *QTy =
8348 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8349 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8350 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8351 }
8352 case NEON::BI__builtin_neon_vmull_v:
8353 // FIXME: the integer vmull operations could be emitted in terms of pure
8354 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8355 // hoisting the exts outside loops. Until global ISel comes along that can
8356 // see through such movement this leads to bad CodeGen. So we need an
8357 // intrinsic for now.
8358 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8359 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8360 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8361 case NEON::BI__builtin_neon_vpadal_v:
8362 case NEON::BI__builtin_neon_vpadalq_v: {
8363 // The source operand type has twice as many elements of half the size.
8364 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8365 llvm::Type *EltTy =
8366 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8367 auto *NarrowTy =
8368 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8369 llvm::Type *Tys[2] = { Ty, NarrowTy };
8370 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8371 }
8372 case NEON::BI__builtin_neon_vpaddl_v:
8373 case NEON::BI__builtin_neon_vpaddlq_v: {
8374 // The source operand type has twice as many elements of half the size.
8375 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8376 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8377 auto *NarrowTy =
8378 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8379 llvm::Type *Tys[2] = { Ty, NarrowTy };
8380 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8381 }
8382 case NEON::BI__builtin_neon_vqdmlal_v:
8383 case NEON::BI__builtin_neon_vqdmlsl_v: {
8384 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8385 Ops[1] =
8386 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8387 Ops.resize(2);
8388 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8389 }
8390 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8391 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8392 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8393 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8394 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8395 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8396 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8397 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8398 RTy->getNumElements() * 2);
8399 llvm::Type *Tys[2] = {
8400 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8401 /*isQuad*/ false))};
8402 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8403 }
8404 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8405 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8406 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8407 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8408 llvm::Type *Tys[2] = {
8409 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8410 /*isQuad*/ true))};
8411 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8412 }
8413 case NEON::BI__builtin_neon_vqshl_n_v:
8414 case NEON::BI__builtin_neon_vqshlq_n_v:
8415 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8416 1, false);
8417 case NEON::BI__builtin_neon_vqshlu_n_v:
8418 case NEON::BI__builtin_neon_vqshluq_n_v:
8419 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8420 1, false);
8421 case NEON::BI__builtin_neon_vrecpe_v:
8422 case NEON::BI__builtin_neon_vrecpeq_v:
8423 case NEON::BI__builtin_neon_vrsqrte_v:
8424 case NEON::BI__builtin_neon_vrsqrteq_v:
8425 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8426 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8427 case NEON::BI__builtin_neon_vrndi_v:
8428 case NEON::BI__builtin_neon_vrndiq_v:
8429 Int = Builder.getIsFPConstrained()
8430 ? Intrinsic::experimental_constrained_nearbyint
8431 : Intrinsic::nearbyint;
8432 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8433 case NEON::BI__builtin_neon_vrshr_n_v:
8434 case NEON::BI__builtin_neon_vrshrq_n_v:
8435 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8436 1, true);
8437 case NEON::BI__builtin_neon_vsha512hq_u64:
8438 case NEON::BI__builtin_neon_vsha512h2q_u64:
8439 case NEON::BI__builtin_neon_vsha512su0q_u64:
8440 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8441 Function *F = CGM.getIntrinsic(Int);
8442 return EmitNeonCall(F, Ops, "");
8443 }
8444 case NEON::BI__builtin_neon_vshl_n_v:
8445 case NEON::BI__builtin_neon_vshlq_n_v:
8446 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8447 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8448 "vshl_n");
8449 case NEON::BI__builtin_neon_vshll_n_v: {
8450 llvm::FixedVectorType *SrcTy =
8451 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8452 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8453 if (Usgn)
8454 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8455 else
8456 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8457 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8458 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8459 }
8460 case NEON::BI__builtin_neon_vshrn_n_v: {
8461 llvm::FixedVectorType *SrcTy =
8462 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8463 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8464 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8465 if (Usgn)
8466 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8467 else
8468 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8469 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8470 }
8471 case NEON::BI__builtin_neon_vshr_n_v:
8472 case NEON::BI__builtin_neon_vshrq_n_v:
8473 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8474 case NEON::BI__builtin_neon_vst1_v:
8475 case NEON::BI__builtin_neon_vst1q_v:
8476 case NEON::BI__builtin_neon_vst2_v:
8477 case NEON::BI__builtin_neon_vst2q_v:
8478 case NEON::BI__builtin_neon_vst3_v:
8479 case NEON::BI__builtin_neon_vst3q_v:
8480 case NEON::BI__builtin_neon_vst4_v:
8481 case NEON::BI__builtin_neon_vst4q_v:
8482 case NEON::BI__builtin_neon_vst2_lane_v:
8483 case NEON::BI__builtin_neon_vst2q_lane_v:
8484 case NEON::BI__builtin_neon_vst3_lane_v:
8485 case NEON::BI__builtin_neon_vst3q_lane_v:
8486 case NEON::BI__builtin_neon_vst4_lane_v:
8487 case NEON::BI__builtin_neon_vst4q_lane_v: {
8488 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8489 Ops.push_back(getAlignmentValue32(PtrOp0));
8490 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8491 }
8492 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8493 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8494 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8495 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8496 case NEON::BI__builtin_neon_vsm4eq_u32: {
8497 Function *F = CGM.getIntrinsic(Int);
8498 return EmitNeonCall(F, Ops, "");
8499 }
8500 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8501 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8502 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8503 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8504 Function *F = CGM.getIntrinsic(Int);
8505 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8506 return EmitNeonCall(F, Ops, "");
8507 }
8508 case NEON::BI__builtin_neon_vst1_x2_v:
8509 case NEON::BI__builtin_neon_vst1q_x2_v:
8510 case NEON::BI__builtin_neon_vst1_x3_v:
8511 case NEON::BI__builtin_neon_vst1q_x3_v:
8512 case NEON::BI__builtin_neon_vst1_x4_v:
8513 case NEON::BI__builtin_neon_vst1q_x4_v: {
8514 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8515 // in AArch64 it comes last. We may want to stick to one or another.
8516 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8517 Arch == llvm::Triple::aarch64_32) {
8518 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8519 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8520 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8521 }
8522 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8523 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8524 }
8525 case NEON::BI__builtin_neon_vsubhn_v: {
8526 llvm::FixedVectorType *SrcTy =
8527 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8528
8529 // %sum = add <4 x i32> %lhs, %rhs
8530 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8531 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8532 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8533
8534 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8535 Constant *ShiftAmt =
8536 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8537 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8538
8539 // %res = trunc <4 x i32> %high to <4 x i16>
8540 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8541 }
8542 case NEON::BI__builtin_neon_vtrn_v:
8543 case NEON::BI__builtin_neon_vtrnq_v: {
8544 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8545 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8546 Value *SV = nullptr;
8547
8548 for (unsigned vi = 0; vi != 2; ++vi) {
8549 SmallVector<int, 16> Indices;
8550 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8551 Indices.push_back(i+vi);
8552 Indices.push_back(i+e+vi);
8553 }
8554 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8555 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8556 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8557 }
8558 return SV;
8559 }
8560 case NEON::BI__builtin_neon_vtst_v:
8561 case NEON::BI__builtin_neon_vtstq_v: {
8562 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8563 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8564 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8565 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8566 ConstantAggregateZero::get(Ty));
8567 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8568 }
8569 case NEON::BI__builtin_neon_vuzp_v:
8570 case NEON::BI__builtin_neon_vuzpq_v: {
8571 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8572 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8573 Value *SV = nullptr;
8574
8575 for (unsigned vi = 0; vi != 2; ++vi) {
8576 SmallVector<int, 16> Indices;
8577 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8578 Indices.push_back(2*i+vi);
8579
8580 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8581 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8582 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8583 }
8584 return SV;
8585 }
8586 case NEON::BI__builtin_neon_vxarq_u64: {
8587 Function *F = CGM.getIntrinsic(Int);
8588 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8589 return EmitNeonCall(F, Ops, "");
8590 }
8591 case NEON::BI__builtin_neon_vzip_v:
8592 case NEON::BI__builtin_neon_vzipq_v: {
8593 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8594 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8595 Value *SV = nullptr;
8596
8597 for (unsigned vi = 0; vi != 2; ++vi) {
8598 SmallVector<int, 16> Indices;
8599 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8600 Indices.push_back((i + vi*e) >> 1);
8601 Indices.push_back(((i + vi*e) >> 1)+e);
8602 }
8603 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8604 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8605 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8606 }
8607 return SV;
8608 }
8609 case NEON::BI__builtin_neon_vdot_s32:
8610 case NEON::BI__builtin_neon_vdot_u32:
8611 case NEON::BI__builtin_neon_vdotq_s32:
8612 case NEON::BI__builtin_neon_vdotq_u32: {
8613 auto *InputTy =
8614 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8615 llvm::Type *Tys[2] = { Ty, InputTy };
8616 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8617 }
8618 case NEON::BI__builtin_neon_vfmlal_low_f16:
8619 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8620 auto *InputTy =
8621 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8622 llvm::Type *Tys[2] = { Ty, InputTy };
8623 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8624 }
8625 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8626 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8627 auto *InputTy =
8628 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8629 llvm::Type *Tys[2] = { Ty, InputTy };
8630 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8631 }
8632 case NEON::BI__builtin_neon_vfmlal_high_f16:
8633 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8634 auto *InputTy =
8635 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8636 llvm::Type *Tys[2] = { Ty, InputTy };
8637 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8638 }
8639 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8640 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8641 auto *InputTy =
8642 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8643 llvm::Type *Tys[2] = { Ty, InputTy };
8644 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8645 }
8646 case NEON::BI__builtin_neon_vmmlaq_s32:
8647 case NEON::BI__builtin_neon_vmmlaq_u32: {
8648 auto *InputTy =
8649 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8650 llvm::Type *Tys[2] = { Ty, InputTy };
8651 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8652 }
8653 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8654 auto *InputTy =
8655 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8656 llvm::Type *Tys[2] = { Ty, InputTy };
8657 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8658 }
8659 case NEON::BI__builtin_neon_vusdot_s32:
8660 case NEON::BI__builtin_neon_vusdotq_s32: {
8661 auto *InputTy =
8662 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8663 llvm::Type *Tys[2] = { Ty, InputTy };
8664 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8665 }
8666 case NEON::BI__builtin_neon_vbfdot_f32:
8667 case NEON::BI__builtin_neon_vbfdotq_f32: {
8668 llvm::Type *InputTy =
8669 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8670 llvm::Type *Tys[2] = { Ty, InputTy };
8671 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8672 }
8673 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8674 llvm::Type *Tys[1] = { Ty };
8675 Function *F = CGM.getIntrinsic(Int, Tys);
8676 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8677 }
8678
8679 }
8680
8681 assert(Int && "Expected valid intrinsic number");
8682
8683 // Determine the type(s) of this overloaded AArch64 intrinsic.
8684 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8685
8686 Value *Result = EmitNeonCall(F, Ops, NameHint);
8687 llvm::Type *ResultType = ConvertType(E->getType());
8688 // AArch64 intrinsic one-element vector type cast to
8689 // scalar type expected by the builtin
8690 return Builder.CreateBitCast(Result, ResultType, NameHint);
8691}
8692
8694 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8695 const CmpInst::Predicate Ip, const Twine &Name) {
8696 llvm::Type *OTy = Op->getType();
8697
8698 // FIXME: this is utterly horrific. We should not be looking at previous
8699 // codegen context to find out what needs doing. Unfortunately TableGen
8700 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8701 // (etc).
8702 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8703 OTy = BI->getOperand(0)->getType();
8704
8705 Op = Builder.CreateBitCast(Op, OTy);
8706 if (OTy->getScalarType()->isFloatingPointTy()) {
8707 if (Fp == CmpInst::FCMP_OEQ)
8708 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8709 else
8710 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8711 } else {
8712 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8713 }
8714 return Builder.CreateSExt(Op, Ty, Name);
8715}
8716
8718 Value *ExtOp, Value *IndexOp,
8719 llvm::Type *ResTy, unsigned IntID,
8720 const char *Name) {
8722 if (ExtOp)
8723 TblOps.push_back(ExtOp);
8724
8725 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8726 SmallVector<int, 16> Indices;
8727 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8728 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8729 Indices.push_back(2*i);
8730 Indices.push_back(2*i+1);
8731 }
8732
8733 int PairPos = 0, End = Ops.size() - 1;
8734 while (PairPos < End) {
8735 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8736 Ops[PairPos+1], Indices,
8737 Name));
8738 PairPos += 2;
8739 }
8740
8741 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8742 // of the 128-bit lookup table with zero.
8743 if (PairPos == End) {
8744 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8745 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8746 ZeroTbl, Indices, Name));
8747 }
8748
8749 Function *TblF;
8750 TblOps.push_back(IndexOp);
8751 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8752
8753 return CGF.EmitNeonCall(TblF, TblOps, Name);
8754}
8755
8756Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8757 unsigned Value;
8758 switch (BuiltinID) {
8759 default:
8760 return nullptr;
8761 case clang::ARM::BI__builtin_arm_nop:
8762 Value = 0;
8763 break;
8764 case clang::ARM::BI__builtin_arm_yield:
8765 case clang::ARM::BI__yield:
8766 Value = 1;
8767 break;
8768 case clang::ARM::BI__builtin_arm_wfe:
8769 case clang::ARM::BI__wfe:
8770 Value = 2;
8771 break;
8772 case clang::ARM::BI__builtin_arm_wfi:
8773 case clang::ARM::BI__wfi:
8774 Value = 3;
8775 break;
8776 case clang::ARM::BI__builtin_arm_sev:
8777 case clang::ARM::BI__sev:
8778 Value = 4;
8779 break;
8780 case clang::ARM::BI__builtin_arm_sevl:
8781 case clang::ARM::BI__sevl:
8782 Value = 5;
8783 break;
8784 }
8785
8786 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8787 llvm::ConstantInt::get(Int32Ty, Value));
8788}
8789
8794};
8795
8796// Generates the IR for __builtin_read_exec_*.
8797// Lowers the builtin to amdgcn_ballot intrinsic.
8799 llvm::Type *RegisterType,
8800 llvm::Type *ValueType, bool isExecHi) {
8801 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8802 CodeGen::CodeGenModule &CGM = CGF.CGM;
8803
8804 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8805 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8806
8807 if (isExecHi) {
8808 Value *Rt2 = Builder.CreateLShr(Call, 32);
8809 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8810 return Rt2;
8811 }
8812
8813 return Call;
8814}
8815
8816// Generates the IR for the read/write special register builtin,
8817// ValueType is the type of the value that is to be written or read,
8818// RegisterType is the type of the register being written to or read from.
8820 const CallExpr *E,
8821 llvm::Type *RegisterType,
8822 llvm::Type *ValueType,
8823 SpecialRegisterAccessKind AccessKind,
8824 StringRef SysReg = "") {
8825 // write and register intrinsics only support 32, 64 and 128 bit operations.
8826 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8827 RegisterType->isIntegerTy(128)) &&
8828 "Unsupported size for register.");
8829
8830 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8831 CodeGen::CodeGenModule &CGM = CGF.CGM;
8832 LLVMContext &Context = CGM.getLLVMContext();
8833
8834 if (SysReg.empty()) {
8835 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8836 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8837 }
8838
8839 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8840 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8841 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8842
8843 llvm::Type *Types[] = { RegisterType };
8844
8845 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8846 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8847 && "Can't fit 64-bit value in 32-bit register");
8848
8849 if (AccessKind != Write) {
8850 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8851 llvm::Function *F = CGM.getIntrinsic(
8852 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8853 : llvm::Intrinsic::read_register,
8854 Types);
8855 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8856
8857 if (MixedTypes)
8858 // Read into 64 bit register and then truncate result to 32 bit.
8859 return Builder.CreateTrunc(Call, ValueType);
8860
8861 if (ValueType->isPointerTy())
8862 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8863 return Builder.CreateIntToPtr(Call, ValueType);
8864
8865 return Call;
8866 }
8867
8868 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8869 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8870 if (MixedTypes) {
8871 // Extend 32 bit write value to 64 bit to pass to write.
8872 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8873 return Builder.CreateCall(F, { Metadata, ArgValue });
8874 }
8875
8876 if (ValueType->isPointerTy()) {
8877 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8878 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8879 return Builder.CreateCall(F, { Metadata, ArgValue });
8880 }
8881
8882 return Builder.CreateCall(F, { Metadata, ArgValue });
8883}
8884
8885/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8886/// argument that specifies the vector type.
8887static bool HasExtraNeonArgument(unsigned BuiltinID) {
8888 switch (BuiltinID) {
8889 default: break;
8890 case NEON::BI__builtin_neon_vget_lane_i8:
8891 case NEON::BI__builtin_neon_vget_lane_i16:
8892 case NEON::BI__builtin_neon_vget_lane_bf16:
8893 case NEON::BI__builtin_neon_vget_lane_i32:
8894 case NEON::BI__builtin_neon_vget_lane_i64:
8895 case NEON::BI__builtin_neon_vget_lane_f32:
8896 case NEON::BI__builtin_neon_vgetq_lane_i8:
8897 case NEON::BI__builtin_neon_vgetq_lane_i16:
8898 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8899 case NEON::BI__builtin_neon_vgetq_lane_i32:
8900 case NEON::BI__builtin_neon_vgetq_lane_i64:
8901 case NEON::BI__builtin_neon_vgetq_lane_f32:
8902 case NEON::BI__builtin_neon_vduph_lane_bf16:
8903 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8904 case NEON::BI__builtin_neon_vset_lane_i8:
8905 case NEON::BI__builtin_neon_vset_lane_i16:
8906 case NEON::BI__builtin_neon_vset_lane_bf16:
8907 case NEON::BI__builtin_neon_vset_lane_i32:
8908 case NEON::BI__builtin_neon_vset_lane_i64:
8909 case NEON::BI__builtin_neon_vset_lane_f32:
8910 case NEON::BI__builtin_neon_vsetq_lane_i8:
8911 case NEON::BI__builtin_neon_vsetq_lane_i16:
8912 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8913 case NEON::BI__builtin_neon_vsetq_lane_i32:
8914 case NEON::BI__builtin_neon_vsetq_lane_i64:
8915 case NEON::BI__builtin_neon_vsetq_lane_f32:
8916 case NEON::BI__builtin_neon_vsha1h_u32:
8917 case NEON::BI__builtin_neon_vsha1cq_u32:
8918 case NEON::BI__builtin_neon_vsha1pq_u32:
8919 case NEON::BI__builtin_neon_vsha1mq_u32:
8920 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8921 case clang::ARM::BI_MoveToCoprocessor:
8922 case clang::ARM::BI_MoveToCoprocessor2:
8923 return false;
8924 }
8925 return true;
8926}
8927
8928Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8929 const CallExpr *E,
8930 ReturnValueSlot ReturnValue,
8931 llvm::Triple::ArchType Arch) {
8932 if (auto Hint = GetValueForARMHint(BuiltinID))
8933 return Hint;
8934
8935 if (BuiltinID == clang::ARM::BI__emit) {
8936 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8937 llvm::FunctionType *FTy =
8938 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8939
8941 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8942 llvm_unreachable("Sema will ensure that the parameter is constant");
8943
8944 llvm::APSInt Value = Result.Val.getInt();
8945 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8946
8947 llvm::InlineAsm *Emit =
8948 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8949 /*hasSideEffects=*/true)
8950 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8951 /*hasSideEffects=*/true);
8952
8953 return Builder.CreateCall(Emit);
8954 }
8955
8956 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8957 Value *Option = EmitScalarExpr(E->getArg(0));
8958 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8959 }
8960
8961 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8962 Value *Address = EmitScalarExpr(E->getArg(0));
8963 Value *RW = EmitScalarExpr(E->getArg(1));
8964 Value *IsData = EmitScalarExpr(E->getArg(2));
8965
8966 // Locality is not supported on ARM target
8967 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8968
8969 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8970 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8971 }
8972
8973 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8974 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8975 return Builder.CreateCall(
8976 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8977 }
8978
8979 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8980 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8981 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8982 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8983 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8984 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8985 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8986 return Res;
8987 }
8988
8989
8990 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8991 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8992 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8993 }
8994 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8995 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8996 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8997 "cls");
8998 }
8999
9000 if (BuiltinID == clang::ARM::BI__clear_cache) {
9001 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
9002 const FunctionDecl *FD = E->getDirectCallee();
9003 Value *Ops[2];
9004 for (unsigned i = 0; i < 2; i++)
9005 Ops[i] = EmitScalarExpr(E->getArg(i));
9006 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
9007 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
9008 StringRef Name = FD->getName();
9009 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
9010 }
9011
9012 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
9013 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
9014 Function *F;
9015
9016 switch (BuiltinID) {
9017 default: llvm_unreachable("unexpected builtin");
9018 case clang::ARM::BI__builtin_arm_mcrr:
9019 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
9020 break;
9021 case clang::ARM::BI__builtin_arm_mcrr2:
9022 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
9023 break;
9024 }
9025
9026 // MCRR{2} instruction has 5 operands but
9027 // the intrinsic has 4 because Rt and Rt2
9028 // are represented as a single unsigned 64
9029 // bit integer in the intrinsic definition
9030 // but internally it's represented as 2 32
9031 // bit integers.
9032
9033 Value *Coproc = EmitScalarExpr(E->getArg(0));
9034 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9035 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
9036 Value *CRm = EmitScalarExpr(E->getArg(3));
9037
9038 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9039 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
9040 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
9041 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
9042
9043 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
9044 }
9045
9046 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
9047 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
9048 Function *F;
9049
9050 switch (BuiltinID) {
9051 default: llvm_unreachable("unexpected builtin");
9052 case clang::ARM::BI__builtin_arm_mrrc:
9053 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
9054 break;
9055 case clang::ARM::BI__builtin_arm_mrrc2:
9056 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
9057 break;
9058 }
9059
9060 Value *Coproc = EmitScalarExpr(E->getArg(0));
9061 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9062 Value *CRm = EmitScalarExpr(E->getArg(2));
9063 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
9064
9065 // Returns an unsigned 64 bit integer, represented
9066 // as two 32 bit integers.
9067
9068 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
9069 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
9070 Rt = Builder.CreateZExt(Rt, Int64Ty);
9071 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
9072
9073 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
9074 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
9075 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
9076
9077 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
9078 }
9079
9080 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
9081 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9082 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
9083 getContext().getTypeSize(E->getType()) == 64) ||
9084 BuiltinID == clang::ARM::BI__ldrexd) {
9085 Function *F;
9086
9087 switch (BuiltinID) {
9088 default: llvm_unreachable("unexpected builtin");
9089 case clang::ARM::BI__builtin_arm_ldaex:
9090 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
9091 break;
9092 case clang::ARM::BI__builtin_arm_ldrexd:
9093 case clang::ARM::BI__builtin_arm_ldrex:
9094 case clang::ARM::BI__ldrexd:
9095 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
9096 break;
9097 }
9098
9099 Value *LdPtr = EmitScalarExpr(E->getArg(0));
9100 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
9101
9102 Value *Val0 = Builder.CreateExtractValue(Val, 1);
9103 Value *Val1 = Builder.CreateExtractValue(Val, 0);
9104 Val0 = Builder.CreateZExt(Val0, Int64Ty);
9105 Val1 = Builder.CreateZExt(Val1, Int64Ty);
9106
9107 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
9108 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
9109 Val = Builder.CreateOr(Val, Val1);
9110 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
9111 }
9112
9113 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9114 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
9115 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
9116
9117 QualType Ty = E->getType();
9118 llvm::Type *RealResTy = ConvertType(Ty);
9119 llvm::Type *IntTy =
9120 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9121
9123 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
9124 : Intrinsic::arm_ldrex,
9125 UnqualPtrTy);
9126 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
9127 Val->addParamAttr(
9128 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
9129
9130 if (RealResTy->isPointerTy())
9131 return Builder.CreateIntToPtr(Val, RealResTy);
9132 else {
9133 llvm::Type *IntResTy = llvm::IntegerType::get(
9134 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
9135 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
9136 RealResTy);
9137 }
9138 }
9139
9140 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
9141 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
9142 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
9143 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
9145 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
9146 : Intrinsic::arm_strexd);
9147 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
9148
9149 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9150 Value *Val = EmitScalarExpr(E->getArg(0));
9151 Builder.CreateStore(Val, Tmp);
9152
9153 Address LdPtr = Tmp.withElementType(STy);
9154 Val = Builder.CreateLoad(LdPtr);
9155
9156 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
9157 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
9158 Value *StPtr = EmitScalarExpr(E->getArg(1));
9159 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
9160 }
9161
9162 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
9163 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
9164 Value *StoreVal = EmitScalarExpr(E->getArg(0));
9165 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
9166
9167 QualType Ty = E->getArg(0)->getType();
9168 llvm::Type *StoreTy =
9169 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9170
9171 if (StoreVal->getType()->isPointerTy())
9172 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
9173 else {
9174 llvm::Type *IntTy = llvm::IntegerType::get(
9176 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
9177 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
9178 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
9179 }
9180
9182 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
9183 : Intrinsic::arm_strex,
9184 StoreAddr->getType());
9185
9186 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
9187 CI->addParamAttr(
9188 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
9189 return CI;
9190 }
9191
9192 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
9193 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
9194 return Builder.CreateCall(F);
9195 }
9196
9197 // CRC32
9198 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
9199 switch (BuiltinID) {
9200 case clang::ARM::BI__builtin_arm_crc32b:
9201 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
9202 case clang::ARM::BI__builtin_arm_crc32cb:
9203 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
9204 case clang::ARM::BI__builtin_arm_crc32h:
9205 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
9206 case clang::ARM::BI__builtin_arm_crc32ch:
9207 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
9208 case clang::ARM::BI__builtin_arm_crc32w:
9209 case clang::ARM::BI__builtin_arm_crc32d:
9210 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
9211 case clang::ARM::BI__builtin_arm_crc32cw:
9212 case clang::ARM::BI__builtin_arm_crc32cd:
9213 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
9214 }
9215
9216 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
9217 Value *Arg0 = EmitScalarExpr(E->getArg(0));
9218 Value *Arg1 = EmitScalarExpr(E->getArg(1));
9219
9220 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
9221 // intrinsics, hence we need different codegen for these cases.
9222 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
9223 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
9224 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9225 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
9226 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
9227 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
9228
9229 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9230 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
9231 return Builder.CreateCall(F, {Res, Arg1b});
9232 } else {
9233 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
9234
9235 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9236 return Builder.CreateCall(F, {Arg0, Arg1});
9237 }
9238 }
9239
9240 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9241 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9242 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9243 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
9244 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
9245 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
9246
9247 SpecialRegisterAccessKind AccessKind = Write;
9248 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9249 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9250 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
9251 AccessKind = VolatileRead;
9252
9253 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9254 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
9255
9256 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9257 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
9258
9259 llvm::Type *ValueType;
9260 llvm::Type *RegisterType;
9261 if (IsPointerBuiltin) {
9262 ValueType = VoidPtrTy;
9264 } else if (Is64Bit) {
9265 ValueType = RegisterType = Int64Ty;
9266 } else {
9267 ValueType = RegisterType = Int32Ty;
9268 }
9269
9270 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
9271 AccessKind);
9272 }
9273
9274 if (BuiltinID == ARM::BI__builtin_sponentry) {
9275 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
9276 return Builder.CreateCall(F);
9277 }
9278
9279 // Handle MSVC intrinsics before argument evaluation to prevent double
9280 // evaluation.
9281 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
9282 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
9283
9284 // Deal with MVE builtins
9285 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9286 return Result;
9287 // Handle CDE builtins
9288 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9289 return Result;
9290
9291 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
9292 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
9293 return P.first == BuiltinID;
9294 });
9295 if (It != end(NEONEquivalentIntrinsicMap))
9296 BuiltinID = It->second;
9297
9298 // Find out if any arguments are required to be integer constant
9299 // expressions.
9300 unsigned ICEArguments = 0;
9302 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9303 assert(Error == ASTContext::GE_None && "Should not codegen an error");
9304
9305 auto getAlignmentValue32 = [&](Address addr) -> Value* {
9306 return Builder.getInt32(addr.getAlignment().getQuantity());
9307 };
9308
9309 Address PtrOp0 = Address::invalid();
9310 Address PtrOp1 = Address::invalid();
9312 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
9313 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
9314 for (unsigned i = 0, e = NumArgs; i != e; i++) {
9315 if (i == 0) {
9316 switch (BuiltinID) {
9317 case NEON::BI__builtin_neon_vld1_v:
9318 case NEON::BI__builtin_neon_vld1q_v:
9319 case NEON::BI__builtin_neon_vld1q_lane_v:
9320 case NEON::BI__builtin_neon_vld1_lane_v:
9321 case NEON::BI__builtin_neon_vld1_dup_v:
9322 case NEON::BI__builtin_neon_vld1q_dup_v:
9323 case NEON::BI__builtin_neon_vst1_v:
9324 case NEON::BI__builtin_neon_vst1q_v:
9325 case NEON::BI__builtin_neon_vst1q_lane_v:
9326 case NEON::BI__builtin_neon_vst1_lane_v:
9327 case NEON::BI__builtin_neon_vst2_v:
9328 case NEON::BI__builtin_neon_vst2q_v:
9329 case NEON::BI__builtin_neon_vst2_lane_v:
9330 case NEON::BI__builtin_neon_vst2q_lane_v:
9331 case NEON::BI__builtin_neon_vst3_v:
9332 case NEON::BI__builtin_neon_vst3q_v:
9333 case NEON::BI__builtin_neon_vst3_lane_v:
9334 case NEON::BI__builtin_neon_vst3q_lane_v:
9335 case NEON::BI__builtin_neon_vst4_v:
9336 case NEON::BI__builtin_neon_vst4q_v:
9337 case NEON::BI__builtin_neon_vst4_lane_v:
9338 case NEON::BI__builtin_neon_vst4q_lane_v:
9339 // Get the alignment for the argument in addition to the value;
9340 // we'll use it later.
9341 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
9342 Ops.push_back(PtrOp0.emitRawPointer(*this));
9343 continue;
9344 }
9345 }
9346 if (i == 1) {
9347 switch (BuiltinID) {
9348 case NEON::BI__builtin_neon_vld2_v:
9349 case NEON::BI__builtin_neon_vld2q_v:
9350 case NEON::BI__builtin_neon_vld3_v:
9351 case NEON::BI__builtin_neon_vld3q_v:
9352 case NEON::BI__builtin_neon_vld4_v:
9353 case NEON::BI__builtin_neon_vld4q_v:
9354 case NEON::BI__builtin_neon_vld2_lane_v:
9355 case NEON::BI__builtin_neon_vld2q_lane_v:
9356 case NEON::BI__builtin_neon_vld3_lane_v:
9357 case NEON::BI__builtin_neon_vld3q_lane_v:
9358 case NEON::BI__builtin_neon_vld4_lane_v:
9359 case NEON::BI__builtin_neon_vld4q_lane_v:
9360 case NEON::BI__builtin_neon_vld2_dup_v:
9361 case NEON::BI__builtin_neon_vld2q_dup_v:
9362 case NEON::BI__builtin_neon_vld3_dup_v:
9363 case NEON::BI__builtin_neon_vld3q_dup_v:
9364 case NEON::BI__builtin_neon_vld4_dup_v:
9365 case NEON::BI__builtin_neon_vld4q_dup_v:
9366 // Get the alignment for the argument in addition to the value;
9367 // we'll use it later.
9368 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9369 Ops.push_back(PtrOp1.emitRawPointer(*this));
9370 continue;
9371 }
9372 }
9373
9374 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9375 }
9376
9377 switch (BuiltinID) {
9378 default: break;
9379
9380 case NEON::BI__builtin_neon_vget_lane_i8:
9381 case NEON::BI__builtin_neon_vget_lane_i16:
9382 case NEON::BI__builtin_neon_vget_lane_i32:
9383 case NEON::BI__builtin_neon_vget_lane_i64:
9384 case NEON::BI__builtin_neon_vget_lane_bf16:
9385 case NEON::BI__builtin_neon_vget_lane_f32:
9386 case NEON::BI__builtin_neon_vgetq_lane_i8:
9387 case NEON::BI__builtin_neon_vgetq_lane_i16:
9388 case NEON::BI__builtin_neon_vgetq_lane_i32:
9389 case NEON::BI__builtin_neon_vgetq_lane_i64:
9390 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9391 case NEON::BI__builtin_neon_vgetq_lane_f32:
9392 case NEON::BI__builtin_neon_vduph_lane_bf16:
9393 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9394 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9395
9396 case NEON::BI__builtin_neon_vrndns_f32: {
9397 Value *Arg = EmitScalarExpr(E->getArg(0));
9398 llvm::Type *Tys[] = {Arg->getType()};
9399 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9400 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9401
9402 case NEON::BI__builtin_neon_vset_lane_i8:
9403 case NEON::BI__builtin_neon_vset_lane_i16:
9404 case NEON::BI__builtin_neon_vset_lane_i32:
9405 case NEON::BI__builtin_neon_vset_lane_i64:
9406 case NEON::BI__builtin_neon_vset_lane_bf16:
9407 case NEON::BI__builtin_neon_vset_lane_f32:
9408 case NEON::BI__builtin_neon_vsetq_lane_i8:
9409 case NEON::BI__builtin_neon_vsetq_lane_i16:
9410 case NEON::BI__builtin_neon_vsetq_lane_i32:
9411 case NEON::BI__builtin_neon_vsetq_lane_i64:
9412 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9413 case NEON::BI__builtin_neon_vsetq_lane_f32:
9414 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9415
9416 case NEON::BI__builtin_neon_vsha1h_u32:
9417 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9418 "vsha1h");
9419 case NEON::BI__builtin_neon_vsha1cq_u32:
9420 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9421 "vsha1h");
9422 case NEON::BI__builtin_neon_vsha1pq_u32:
9423 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9424 "vsha1h");
9425 case NEON::BI__builtin_neon_vsha1mq_u32:
9426 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9427 "vsha1h");
9428
9429 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9430 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9431 "vcvtbfp2bf");
9432 }
9433
9434 // The ARM _MoveToCoprocessor builtins put the input register value as
9435 // the first argument, but the LLVM intrinsic expects it as the third one.
9436 case clang::ARM::BI_MoveToCoprocessor:
9437 case clang::ARM::BI_MoveToCoprocessor2: {
9438 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9439 ? Intrinsic::arm_mcr
9440 : Intrinsic::arm_mcr2);
9441 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9442 Ops[3], Ops[4], Ops[5]});
9443 }
9444 }
9445
9446 // Get the last argument, which specifies the vector type.
9447 assert(HasExtraArg);
9448 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9449 std::optional<llvm::APSInt> Result =
9451 if (!Result)
9452 return nullptr;
9453
9454 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9455 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9456 // Determine the overloaded type of this builtin.
9457 llvm::Type *Ty;
9458 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9459 Ty = FloatTy;
9460 else
9461 Ty = DoubleTy;
9462
9463 // Determine whether this is an unsigned conversion or not.
9464 bool usgn = Result->getZExtValue() == 1;
9465 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9466
9467 // Call the appropriate intrinsic.
9468 Function *F = CGM.getIntrinsic(Int, Ty);
9469 return Builder.CreateCall(F, Ops, "vcvtr");
9470 }
9471
9472 // Determine the type of this overloaded NEON intrinsic.
9473 NeonTypeFlags Type = Result->getZExtValue();
9474 bool usgn = Type.isUnsigned();
9475 bool rightShift = false;
9476
9477 llvm::FixedVectorType *VTy =
9478 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9479 getTarget().hasBFloat16Type());
9480 llvm::Type *Ty = VTy;
9481 if (!Ty)
9482 return nullptr;
9483
9484 // Many NEON builtins have identical semantics and uses in ARM and
9485 // AArch64. Emit these in a single function.
9486 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9487 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9488 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9489 if (Builtin)
9491 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9492 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9493
9494 unsigned Int;
9495 switch (BuiltinID) {
9496 default: return nullptr;
9497 case NEON::BI__builtin_neon_vld1q_lane_v:
9498 // Handle 64-bit integer elements as a special case. Use shuffles of
9499 // one-element vectors to avoid poor code for i64 in the backend.
9500 if (VTy->getElementType()->isIntegerTy(64)) {
9501 // Extract the other lane.
9502 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9503 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9504 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9505 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9506 // Load the value as a one-element vector.
9507 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9508 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9509 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9510 Value *Align = getAlignmentValue32(PtrOp0);
9511 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9512 // Combine them.
9513 int Indices[] = {1 - Lane, Lane};
9514 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9515 }
9516 [[fallthrough]];
9517 case NEON::BI__builtin_neon_vld1_lane_v: {
9518 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9519 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9520 Value *Ld = Builder.CreateLoad(PtrOp0);
9521 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9522 }
9523 case NEON::BI__builtin_neon_vqrshrn_n_v:
9524 Int =
9525 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9526 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9527 1, true);
9528 case NEON::BI__builtin_neon_vqrshrun_n_v:
9529 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9530 Ops, "vqrshrun_n", 1, true);
9531 case NEON::BI__builtin_neon_vqshrn_n_v:
9532 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9533 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9534 1, true);
9535 case NEON::BI__builtin_neon_vqshrun_n_v:
9536 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9537 Ops, "vqshrun_n", 1, true);
9538 case NEON::BI__builtin_neon_vrecpe_v:
9539 case NEON::BI__builtin_neon_vrecpeq_v:
9540 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9541 Ops, "vrecpe");
9542 case NEON::BI__builtin_neon_vrshrn_n_v:
9543 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9544 Ops, "vrshrn_n", 1, true);
9545 case NEON::BI__builtin_neon_vrsra_n_v:
9546 case NEON::BI__builtin_neon_vrsraq_n_v:
9547 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9548 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9549 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9550 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9551 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9552 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9553 case NEON::BI__builtin_neon_vsri_n_v:
9554 case NEON::BI__builtin_neon_vsriq_n_v:
9555 rightShift = true;
9556 [[fallthrough]];
9557 case NEON::BI__builtin_neon_vsli_n_v:
9558 case NEON::BI__builtin_neon_vsliq_n_v:
9559 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9560 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9561 Ops, "vsli_n");
9562 case NEON::BI__builtin_neon_vsra_n_v:
9563 case NEON::BI__builtin_neon_vsraq_n_v:
9564 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9565 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9566 return Builder.CreateAdd(Ops[0], Ops[1]);
9567 case NEON::BI__builtin_neon_vst1q_lane_v:
9568 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9569 // a one-element vector and avoid poor code for i64 in the backend.
9570 if (VTy->getElementType()->isIntegerTy(64)) {
9571 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9572 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9573 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9574 Ops[2] = getAlignmentValue32(PtrOp0);
9575 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9576 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9577 Tys), Ops);
9578 }
9579 [[fallthrough]];
9580 case NEON::BI__builtin_neon_vst1_lane_v: {
9581 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9582 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9583 return Builder.CreateStore(Ops[1],
9584 PtrOp0.withElementType(Ops[1]->getType()));
9585 }
9586 case NEON::BI__builtin_neon_vtbl1_v:
9587 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9588 Ops, "vtbl1");
9589 case NEON::BI__builtin_neon_vtbl2_v:
9590 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9591 Ops, "vtbl2");
9592 case NEON::BI__builtin_neon_vtbl3_v:
9593 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9594 Ops, "vtbl3");
9595 case NEON::BI__builtin_neon_vtbl4_v:
9596 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9597 Ops, "vtbl4");
9598 case NEON::BI__builtin_neon_vtbx1_v:
9599 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9600 Ops, "vtbx1");
9601 case NEON::BI__builtin_neon_vtbx2_v:
9602 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9603 Ops, "vtbx2");
9604 case NEON::BI__builtin_neon_vtbx3_v:
9605 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9606 Ops, "vtbx3");
9607 case NEON::BI__builtin_neon_vtbx4_v:
9608 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9609 Ops, "vtbx4");
9610 }
9611}
9612
9613template<typename Integer>
9615 return E->getIntegerConstantExpr(Context)->getExtValue();
9616}
9617
9618static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9619 llvm::Type *T, bool Unsigned) {
9620 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9621 // which finds it convenient to specify signed/unsigned as a boolean flag.
9622 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9623}
9624
9625static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9626 uint32_t Shift, bool Unsigned) {
9627 // MVE helper function for integer shift right. This must handle signed vs
9628 // unsigned, and also deal specially with the case where the shift count is
9629 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9630 // undefined behavior, but in MVE it's legal, so we must convert it to code
9631 // that is not undefined in IR.
9632 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9633 ->getElementType()
9634 ->getPrimitiveSizeInBits();
9635 if (Shift == LaneBits) {
9636 // An unsigned shift of the full lane size always generates zero, so we can
9637 // simply emit a zero vector. A signed shift of the full lane size does the
9638 // same thing as shifting by one bit fewer.
9639 if (Unsigned)
9640 return llvm::Constant::getNullValue(V->getType());
9641 else
9642 --Shift;
9643 }
9644 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9645}
9646
9647static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9648 // MVE-specific helper function for a vector splat, which infers the element
9649 // count of the output vector by knowing that MVE vectors are all 128 bits
9650 // wide.
9651 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9652 return Builder.CreateVectorSplat(Elements, V);
9653}
9654
9655static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9656 CodeGenFunction *CGF,
9657 llvm::Value *V,
9658 llvm::Type *DestType) {
9659 // Convert one MVE vector type into another by reinterpreting its in-register
9660 // format.
9661 //
9662 // Little-endian, this is identical to a bitcast (which reinterprets the
9663 // memory format). But big-endian, they're not necessarily the same, because
9664 // the register and memory formats map to each other differently depending on
9665 // the lane size.
9666 //
9667 // We generate a bitcast whenever we can (if we're little-endian, or if the
9668 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9669 // that performs the different kind of reinterpretation.
9670 if (CGF->getTarget().isBigEndian() &&
9671 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9672 return Builder.CreateCall(
9673 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9674 {DestType, V->getType()}),
9675 V);
9676 } else {
9677 return Builder.CreateBitCast(V, DestType);
9678 }
9679}
9680
9681static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9682 // Make a shufflevector that extracts every other element of a vector (evens
9683 // or odds, as desired).
9684 SmallVector<int, 16> Indices;
9685 unsigned InputElements =
9686 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9687 for (unsigned i = 0; i < InputElements; i += 2)
9688 Indices.push_back(i + Odd);
9689 return Builder.CreateShuffleVector(V, Indices);
9690}
9691
9692static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9693 llvm::Value *V1) {
9694 // Make a shufflevector that interleaves two vectors element by element.
9695 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9696 SmallVector<int, 16> Indices;
9697 unsigned InputElements =
9698 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9699 for (unsigned i = 0; i < InputElements; i++) {
9700 Indices.push_back(i);
9701 Indices.push_back(i + InputElements);
9702 }
9703 return Builder.CreateShuffleVector(V0, V1, Indices);
9704}
9705
9706template<unsigned HighBit, unsigned OtherBits>
9707static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9708 // MVE-specific helper function to make a vector splat of a constant such as
9709 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9710 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9711 unsigned LaneBits = T->getPrimitiveSizeInBits();
9712 uint32_t Value = HighBit << (LaneBits - 1);
9713 if (OtherBits)
9714 Value |= (1UL << (LaneBits - 1)) - 1;
9715 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9716 return ARMMVEVectorSplat(Builder, Lane);
9717}
9718
9719static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9720 llvm::Value *V,
9721 unsigned ReverseWidth) {
9722 // MVE-specific helper function which reverses the elements of a
9723 // vector within every (ReverseWidth)-bit collection of lanes.
9724 SmallVector<int, 16> Indices;
9725 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9726 unsigned Elements = 128 / LaneSize;
9727 unsigned Mask = ReverseWidth / LaneSize - 1;
9728 for (unsigned i = 0; i < Elements; i++)
9729 Indices.push_back(i ^ Mask);
9730 return Builder.CreateShuffleVector(V, Indices);
9731}
9732
9734 const CallExpr *E,
9735 ReturnValueSlot ReturnValue,
9736 llvm::Triple::ArchType Arch) {
9737 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9738 Intrinsic::ID IRIntr;
9739 unsigned NumVectors;
9740
9741 // Code autogenerated by Tablegen will handle all the simple builtins.
9742 switch (BuiltinID) {
9743 #include "clang/Basic/arm_mve_builtin_cg.inc"
9744
9745 // If we didn't match an MVE builtin id at all, go back to the
9746 // main EmitARMBuiltinExpr.
9747 default:
9748 return nullptr;
9749 }
9750
9751 // Anything that breaks from that switch is an MVE builtin that
9752 // needs handwritten code to generate.
9753
9754 switch (CustomCodeGenType) {
9755
9756 case CustomCodeGen::VLD24: {
9759
9760 auto MvecCType = E->getType();
9761 auto MvecLType = ConvertType(MvecCType);
9762 assert(MvecLType->isStructTy() &&
9763 "Return type for vld[24]q should be a struct");
9764 assert(MvecLType->getStructNumElements() == 1 &&
9765 "Return-type struct for vld[24]q should have one element");
9766 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9767 assert(MvecLTypeInner->isArrayTy() &&
9768 "Return-type struct for vld[24]q should contain an array");
9769 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9770 "Array member of return-type struct vld[24]q has wrong length");
9771 auto VecLType = MvecLTypeInner->getArrayElementType();
9772
9773 Tys.push_back(VecLType);
9774
9775 auto Addr = E->getArg(0);
9776 Ops.push_back(EmitScalarExpr(Addr));
9777 Tys.push_back(ConvertType(Addr->getType()));
9778
9779 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9780 Value *LoadResult = Builder.CreateCall(F, Ops);
9781 Value *MvecOut = PoisonValue::get(MvecLType);
9782 for (unsigned i = 0; i < NumVectors; ++i) {
9783 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9784 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9785 }
9786
9787 if (ReturnValue.isNull())
9788 return MvecOut;
9789 else
9790 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9791 }
9792
9793 case CustomCodeGen::VST24: {
9796
9797 auto Addr = E->getArg(0);
9798 Ops.push_back(EmitScalarExpr(Addr));
9799 Tys.push_back(ConvertType(Addr->getType()));
9800
9801 auto MvecCType = E->getArg(1)->getType();
9802 auto MvecLType = ConvertType(MvecCType);
9803 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9804 assert(MvecLType->getStructNumElements() == 1 &&
9805 "Data-type struct for vst2q should have one element");
9806 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9807 assert(MvecLTypeInner->isArrayTy() &&
9808 "Data-type struct for vst2q should contain an array");
9809 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9810 "Array member of return-type struct vld[24]q has wrong length");
9811 auto VecLType = MvecLTypeInner->getArrayElementType();
9812
9813 Tys.push_back(VecLType);
9814
9815 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9816 EmitAggExpr(E->getArg(1), MvecSlot);
9817 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9818 for (unsigned i = 0; i < NumVectors; i++)
9819 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9820
9821 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9822 Value *ToReturn = nullptr;
9823 for (unsigned i = 0; i < NumVectors; i++) {
9824 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9825 ToReturn = Builder.CreateCall(F, Ops);
9826 Ops.pop_back();
9827 }
9828 return ToReturn;
9829 }
9830 }
9831 llvm_unreachable("unknown custom codegen type.");
9832}
9833
9835 const CallExpr *E,
9836 ReturnValueSlot ReturnValue,
9837 llvm::Triple::ArchType Arch) {
9838 switch (BuiltinID) {
9839 default:
9840 return nullptr;
9841#include "clang/Basic/arm_cde_builtin_cg.inc"
9842 }
9843}
9844
9845static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9846 const CallExpr *E,
9848 llvm::Triple::ArchType Arch) {
9849 unsigned int Int = 0;
9850 const char *s = nullptr;
9851
9852 switch (BuiltinID) {
9853 default:
9854 return nullptr;
9855 case NEON::BI__builtin_neon_vtbl1_v:
9856 case NEON::BI__builtin_neon_vqtbl1_v:
9857 case NEON::BI__builtin_neon_vqtbl1q_v:
9858 case NEON::BI__builtin_neon_vtbl2_v:
9859 case NEON::BI__builtin_neon_vqtbl2_v:
9860 case NEON::BI__builtin_neon_vqtbl2q_v:
9861 case NEON::BI__builtin_neon_vtbl3_v:
9862 case NEON::BI__builtin_neon_vqtbl3_v:
9863 case NEON::BI__builtin_neon_vqtbl3q_v:
9864 case NEON::BI__builtin_neon_vtbl4_v:
9865 case NEON::BI__builtin_neon_vqtbl4_v:
9866 case NEON::BI__builtin_neon_vqtbl4q_v:
9867 break;
9868 case NEON::BI__builtin_neon_vtbx1_v:
9869 case NEON::BI__builtin_neon_vqtbx1_v:
9870 case NEON::BI__builtin_neon_vqtbx1q_v:
9871 case NEON::BI__builtin_neon_vtbx2_v:
9872 case NEON::BI__builtin_neon_vqtbx2_v:
9873 case NEON::BI__builtin_neon_vqtbx2q_v:
9874 case NEON::BI__builtin_neon_vtbx3_v:
9875 case NEON::BI__builtin_neon_vqtbx3_v:
9876 case NEON::BI__builtin_neon_vqtbx3q_v:
9877 case NEON::BI__builtin_neon_vtbx4_v:
9878 case NEON::BI__builtin_neon_vqtbx4_v:
9879 case NEON::BI__builtin_neon_vqtbx4q_v:
9880 break;
9881 }
9882
9883 assert(E->getNumArgs() >= 3);
9884
9885 // Get the last argument, which specifies the vector type.
9886 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9887 std::optional<llvm::APSInt> Result =
9889 if (!Result)
9890 return nullptr;
9891
9892 // Determine the type of this overloaded NEON intrinsic.
9893 NeonTypeFlags Type = Result->getZExtValue();
9894 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9895 if (!Ty)
9896 return nullptr;
9897
9898 CodeGen::CGBuilderTy &Builder = CGF.Builder;
9899
9900 // AArch64 scalar builtins are not overloaded, they do not have an extra
9901 // argument that specifies the vector type, need to handle each case.
9902 switch (BuiltinID) {
9903 case NEON::BI__builtin_neon_vtbl1_v: {
9904 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9905 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9906 }
9907 case NEON::BI__builtin_neon_vtbl2_v: {
9908 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9909 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9910 }
9911 case NEON::BI__builtin_neon_vtbl3_v: {
9912 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9913 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9914 }
9915 case NEON::BI__builtin_neon_vtbl4_v: {
9916 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9917 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9918 }
9919 case NEON::BI__builtin_neon_vtbx1_v: {
9920 Value *TblRes =
9921 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9922 Intrinsic::aarch64_neon_tbl1, "vtbl1");
9923
9924 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9925 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9926 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9927
9928 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9929 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9930 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9931 }
9932 case NEON::BI__builtin_neon_vtbx2_v: {
9933 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9934 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9935 }
9936 case NEON::BI__builtin_neon_vtbx3_v: {
9937 Value *TblRes =
9938 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9939 Intrinsic::aarch64_neon_tbl2, "vtbl2");
9940
9941 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9942 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9943 TwentyFourV);
9944 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9945
9946 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9947 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9948 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9949 }
9950 case NEON::BI__builtin_neon_vtbx4_v: {
9951 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9952 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9953 }
9954 case NEON::BI__builtin_neon_vqtbl1_v:
9955 case NEON::BI__builtin_neon_vqtbl1q_v:
9956 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9957 case NEON::BI__builtin_neon_vqtbl2_v:
9958 case NEON::BI__builtin_neon_vqtbl2q_v: {
9959 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9960 case NEON::BI__builtin_neon_vqtbl3_v:
9961 case NEON::BI__builtin_neon_vqtbl3q_v:
9962 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9963 case NEON::BI__builtin_neon_vqtbl4_v:
9964 case NEON::BI__builtin_neon_vqtbl4q_v:
9965 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9966 case NEON::BI__builtin_neon_vqtbx1_v:
9967 case NEON::BI__builtin_neon_vqtbx1q_v:
9968 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9969 case NEON::BI__builtin_neon_vqtbx2_v:
9970 case NEON::BI__builtin_neon_vqtbx2q_v:
9971 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9972 case NEON::BI__builtin_neon_vqtbx3_v:
9973 case NEON::BI__builtin_neon_vqtbx3q_v:
9974 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9975 case NEON::BI__builtin_neon_vqtbx4_v:
9976 case NEON::BI__builtin_neon_vqtbx4q_v:
9977 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9978 }
9979 }
9980
9981 if (!Int)
9982 return nullptr;
9983
9984 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9985 return CGF.EmitNeonCall(F, Ops, s);
9986}
9987
9989 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9990 Op = Builder.CreateBitCast(Op, Int16Ty);
9991 Value *V = PoisonValue::get(VTy);
9992 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9993 Op = Builder.CreateInsertElement(V, Op, CI);
9994 return Op;
9995}
9996
9997/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9998/// access builtin. Only required if it can't be inferred from the base pointer
9999/// operand.
10000llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
10001 switch (TypeFlags.getMemEltType()) {
10002 case SVETypeFlags::MemEltTyDefault:
10003 return getEltType(TypeFlags);
10004 case SVETypeFlags::MemEltTyInt8:
10005 return Builder.getInt8Ty();
10006 case SVETypeFlags::MemEltTyInt16:
10007 return Builder.getInt16Ty();
10008 case SVETypeFlags::MemEltTyInt32:
10009 return Builder.getInt32Ty();
10010 case SVETypeFlags::MemEltTyInt64:
10011 return Builder.getInt64Ty();
10012 }
10013 llvm_unreachable("Unknown MemEltType");
10014}
10015
10016llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
10017 switch (TypeFlags.getEltType()) {
10018 default:
10019 llvm_unreachable("Invalid SVETypeFlag!");
10020
10021 case SVETypeFlags::EltTyInt8:
10022 return Builder.getInt8Ty();
10023 case SVETypeFlags::EltTyInt16:
10024 return Builder.getInt16Ty();
10025 case SVETypeFlags::EltTyInt32:
10026 return Builder.getInt32Ty();
10027 case SVETypeFlags::EltTyInt64:
10028 return Builder.getInt64Ty();
10029 case SVETypeFlags::EltTyInt128:
10030 return Builder.getInt128Ty();
10031
10032 case SVETypeFlags::EltTyFloat16:
10033 return Builder.getHalfTy();
10034 case SVETypeFlags::EltTyFloat32:
10035 return Builder.getFloatTy();
10036 case SVETypeFlags::EltTyFloat64:
10037 return Builder.getDoubleTy();
10038
10039 case SVETypeFlags::EltTyBFloat16:
10040 return Builder.getBFloatTy();
10041
10042 case SVETypeFlags::EltTyBool8:
10043 case SVETypeFlags::EltTyBool16:
10044 case SVETypeFlags::EltTyBool32:
10045 case SVETypeFlags::EltTyBool64:
10046 return Builder.getInt1Ty();
10047 }
10048}
10049
10050// Return the llvm predicate vector type corresponding to the specified element
10051// TypeFlags.
10052llvm::ScalableVectorType *
10054 switch (TypeFlags.getEltType()) {
10055 default: llvm_unreachable("Unhandled SVETypeFlag!");
10056
10057 case SVETypeFlags::EltTyInt8:
10058 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10059 case SVETypeFlags::EltTyInt16:
10060 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10061 case SVETypeFlags::EltTyInt32:
10062 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10063 case SVETypeFlags::EltTyInt64:
10064 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10065
10066 case SVETypeFlags::EltTyBFloat16:
10067 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10068 case SVETypeFlags::EltTyFloat16:
10069 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10070 case SVETypeFlags::EltTyFloat32:
10071 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10072 case SVETypeFlags::EltTyFloat64:
10073 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10074
10075 case SVETypeFlags::EltTyBool8:
10076 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10077 case SVETypeFlags::EltTyBool16:
10078 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10079 case SVETypeFlags::EltTyBool32:
10080 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10081 case SVETypeFlags::EltTyBool64:
10082 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10083 }
10084}
10085
10086// Return the llvm vector type corresponding to the specified element TypeFlags.
10087llvm::ScalableVectorType *
10088CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
10089 switch (TypeFlags.getEltType()) {
10090 default:
10091 llvm_unreachable("Invalid SVETypeFlag!");
10092
10093 case SVETypeFlags::EltTyInt8:
10094 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10095 case SVETypeFlags::EltTyInt16:
10096 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
10097 case SVETypeFlags::EltTyInt32:
10098 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
10099 case SVETypeFlags::EltTyInt64:
10100 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
10101
10102 case SVETypeFlags::EltTyMFloat8:
10103 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10104 case SVETypeFlags::EltTyFloat16:
10105 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
10106 case SVETypeFlags::EltTyBFloat16:
10107 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
10108 case SVETypeFlags::EltTyFloat32:
10109 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
10110 case SVETypeFlags::EltTyFloat64:
10111 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
10112
10113 case SVETypeFlags::EltTyBool8:
10114 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10115 case SVETypeFlags::EltTyBool16:
10116 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10117 case SVETypeFlags::EltTyBool32:
10118 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10119 case SVETypeFlags::EltTyBool64:
10120 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10121 }
10122}
10123
10124llvm::Value *
10126 Function *Ptrue =
10127 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
10128 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
10129}
10130
10131constexpr unsigned SVEBitsPerBlock = 128;
10132
10133static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
10134 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
10135 return llvm::ScalableVectorType::get(EltTy, NumElts);
10136}
10137
10138// Reinterpret the input predicate so that it can be used to correctly isolate
10139// the elements of the specified datatype.
10141 llvm::ScalableVectorType *VTy) {
10142
10143 if (isa<TargetExtType>(Pred->getType()) &&
10144 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
10145 return Pred;
10146
10147 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
10148 if (Pred->getType() == RTy)
10149 return Pred;
10150
10151 unsigned IntID;
10152 llvm::Type *IntrinsicTy;
10153 switch (VTy->getMinNumElements()) {
10154 default:
10155 llvm_unreachable("unsupported element count!");
10156 case 1:
10157 case 2:
10158 case 4:
10159 case 8:
10160 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
10161 IntrinsicTy = RTy;
10162 break;
10163 case 16:
10164 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
10165 IntrinsicTy = Pred->getType();
10166 break;
10167 }
10168
10169 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
10170 Value *C = Builder.CreateCall(F, Pred);
10171 assert(C->getType() == RTy && "Unexpected return type!");
10172 return C;
10173}
10174
10176 llvm::StructType *Ty) {
10177 if (PredTuple->getType() == Ty)
10178 return PredTuple;
10179
10180 Value *Ret = llvm::PoisonValue::get(Ty);
10181 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
10182 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
10183 Pred = EmitSVEPredicateCast(
10184 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
10185 Ret = Builder.CreateInsertValue(Ret, Pred, I);
10186 }
10187
10188 return Ret;
10189}
10190
10193 unsigned IntID) {
10194 auto *ResultTy = getSVEType(TypeFlags);
10195 auto *OverloadedTy =
10196 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
10197
10198 Function *F = nullptr;
10199 if (Ops[1]->getType()->isVectorTy())
10200 // This is the "vector base, scalar offset" case. In order to uniquely
10201 // map this built-in to an LLVM IR intrinsic, we need both the return type
10202 // and the type of the vector base.
10203 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
10204 else
10205 // This is the "scalar base, vector offset case". The type of the offset
10206 // is encoded in the name of the intrinsic. We only need to specify the
10207 // return type in order to uniquely map this built-in to an LLVM IR
10208 // intrinsic.
10209 F = CGM.getIntrinsic(IntID, OverloadedTy);
10210
10211 // At the ACLE level there's only one predicate type, svbool_t, which is
10212 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10213 // actual type being loaded. For example, when loading doubles (i64) the
10214 // predicate should be <n x 2 x i1> instead. At the IR level the type of
10215 // the predicate and the data being loaded must match. Cast to the type
10216 // expected by the intrinsic. The intrinsic itself should be defined in
10217 // a way than enforces relations between parameter types.
10218 Ops[0] = EmitSVEPredicateCast(
10219 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
10220
10221 // Pass 0 when the offset is missing. This can only be applied when using
10222 // the "vector base" addressing mode for which ACLE allows no offset. The
10223 // corresponding LLVM IR always requires an offset.
10224 if (Ops.size() == 2) {
10225 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10226 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10227 }
10228
10229 // For "vector base, scalar index" scale the index so that it becomes a
10230 // scalar offset.
10231 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
10232 unsigned BytesPerElt =
10233 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10234 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10235 }
10236
10237 Value *Call = Builder.CreateCall(F, Ops);
10238
10239 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
10240 // other cases it's folded into a nop.
10241 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
10242 : Builder.CreateSExt(Call, ResultTy);
10243}
10244
10247 unsigned IntID) {
10248 auto *SrcDataTy = getSVEType(TypeFlags);
10249 auto *OverloadedTy =
10250 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
10251
10252 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
10253 // it's the first argument. Move it accordingly.
10254 Ops.insert(Ops.begin(), Ops.pop_back_val());
10255
10256 Function *F = nullptr;
10257 if (Ops[2]->getType()->isVectorTy())
10258 // This is the "vector base, scalar offset" case. In order to uniquely
10259 // map this built-in to an LLVM IR intrinsic, we need both the return type
10260 // and the type of the vector base.
10261 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
10262 else
10263 // This is the "scalar base, vector offset case". The type of the offset
10264 // is encoded in the name of the intrinsic. We only need to specify the
10265 // return type in order to uniquely map this built-in to an LLVM IR
10266 // intrinsic.
10267 F = CGM.getIntrinsic(IntID, OverloadedTy);
10268
10269 // Pass 0 when the offset is missing. This can only be applied when using
10270 // the "vector base" addressing mode for which ACLE allows no offset. The
10271 // corresponding LLVM IR always requires an offset.
10272 if (Ops.size() == 3) {
10273 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10274 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10275 }
10276
10277 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
10278 // folded into a nop.
10279 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
10280
10281 // At the ACLE level there's only one predicate type, svbool_t, which is
10282 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10283 // actual type being stored. For example, when storing doubles (i64) the
10284 // predicated should be <n x 2 x i1> instead. At the IR level the type of
10285 // the predicate and the data being stored must match. Cast to the type
10286 // expected by the intrinsic. The intrinsic itself should be defined in
10287 // a way that enforces relations between parameter types.
10288 Ops[1] = EmitSVEPredicateCast(
10289 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
10290
10291 // For "vector base, scalar index" scale the index so that it becomes a
10292 // scalar offset.
10293 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
10294 unsigned BytesPerElt =
10295 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10296 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
10297 }
10298
10299 return Builder.CreateCall(F, Ops);
10300}
10301
10304 unsigned IntID) {
10305 // The gather prefetches are overloaded on the vector input - this can either
10306 // be the vector of base addresses or vector of offsets.
10307 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
10308 if (!OverloadedTy)
10309 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
10310
10311 // Cast the predicate from svbool_t to the right number of elements.
10312 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
10313
10314 // vector + imm addressing modes
10315 if (Ops[1]->getType()->isVectorTy()) {
10316 if (Ops.size() == 3) {
10317 // Pass 0 for 'vector+imm' when the index is omitted.
10318 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10319
10320 // The sv_prfop is the last operand in the builtin and IR intrinsic.
10321 std::swap(Ops[2], Ops[3]);
10322 } else {
10323 // Index needs to be passed as scaled offset.
10324 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10325 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
10326 if (BytesPerElt > 1)
10327 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10328 }
10329 }
10330
10331 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
10332 return Builder.CreateCall(F, Ops);
10333}
10334
10337 unsigned IntID) {
10338 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10339 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10340 Value *BasePtr = Ops[1];
10341
10342 // Does the load have an offset?
10343 if (Ops.size() > 2)
10344 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10345
10346 Function *F = CGM.getIntrinsic(IntID, {VTy});
10347 return Builder.CreateCall(F, {Predicate, BasePtr});
10348}
10349
10352 unsigned IntID) {
10353 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10354
10355 unsigned N;
10356 switch (IntID) {
10357 case Intrinsic::aarch64_sve_st2:
10358 case Intrinsic::aarch64_sve_st1_pn_x2:
10359 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10360 case Intrinsic::aarch64_sve_st2q:
10361 N = 2;
10362 break;
10363 case Intrinsic::aarch64_sve_st3:
10364 case Intrinsic::aarch64_sve_st3q:
10365 N = 3;
10366 break;
10367 case Intrinsic::aarch64_sve_st4:
10368 case Intrinsic::aarch64_sve_st1_pn_x4:
10369 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10370 case Intrinsic::aarch64_sve_st4q:
10371 N = 4;
10372 break;
10373 default:
10374 llvm_unreachable("unknown intrinsic!");
10375 }
10376
10377 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10378 Value *BasePtr = Ops[1];
10379
10380 // Does the store have an offset?
10381 if (Ops.size() > (2 + N))
10382 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10383
10384 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10385 // need to break up the tuple vector.
10387 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10388 Operands.push_back(Ops[I]);
10389 Operands.append({Predicate, BasePtr});
10390 Function *F = CGM.getIntrinsic(IntID, { VTy });
10391
10392 return Builder.CreateCall(F, Operands);
10393}
10394
10395// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10396// svpmullt_pair intrinsics, with the exception that their results are bitcast
10397// to a wider type.
10400 unsigned BuiltinID) {
10401 // Splat scalar operand to vector (intrinsics with _n infix)
10402 if (TypeFlags.hasSplatOperand()) {
10403 unsigned OpNo = TypeFlags.getSplatOperand();
10404 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10405 }
10406
10407 // The pair-wise function has a narrower overloaded type.
10408 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10409 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10410
10411 // Now bitcast to the wider result type.
10412 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10413 return EmitSVEReinterpret(Call, Ty);
10414}
10415
10417 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10418 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10419 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10420 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10421}
10422
10425 unsigned BuiltinID) {
10426 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10427 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10428 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10429
10430 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10431 Value *BasePtr = Ops[1];
10432
10433 // Implement the index operand if not omitted.
10434 if (Ops.size() > 3)
10435 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10436
10437 Value *PrfOp = Ops.back();
10438
10439 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10440 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10441}
10442
10444 llvm::Type *ReturnTy,
10446 unsigned IntrinsicID,
10447 bool IsZExtReturn) {
10448 QualType LangPTy = E->getArg(1)->getType();
10449 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10450 LangPTy->castAs<PointerType>()->getPointeeType());
10451
10452 // The vector type that is returned may be different from the
10453 // eventual type loaded from memory.
10454 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10455 llvm::ScalableVectorType *MemoryTy = nullptr;
10456 llvm::ScalableVectorType *PredTy = nullptr;
10457 bool IsQuadLoad = false;
10458 switch (IntrinsicID) {
10459 case Intrinsic::aarch64_sve_ld1uwq:
10460 case Intrinsic::aarch64_sve_ld1udq:
10461 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10462 PredTy = llvm::ScalableVectorType::get(
10463 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10464 IsQuadLoad = true;
10465 break;
10466 default:
10467 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10468 PredTy = MemoryTy;
10469 break;
10470 }
10471
10472 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10473 Value *BasePtr = Ops[1];
10474
10475 // Does the load have an offset?
10476 if (Ops.size() > 2)
10477 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10478
10479 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10480 auto *Load =
10481 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10482 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10483 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10484
10485 if (IsQuadLoad)
10486 return Load;
10487
10488 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10489 : Builder.CreateSExt(Load, VectorTy);
10490}
10491
10494 unsigned IntrinsicID) {
10495 QualType LangPTy = E->getArg(1)->getType();
10496 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10497 LangPTy->castAs<PointerType>()->getPointeeType());
10498
10499 // The vector type that is stored may be different from the
10500 // eventual type stored to memory.
10501 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10502 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10503
10504 auto PredTy = MemoryTy;
10505 auto AddrMemoryTy = MemoryTy;
10506 bool IsQuadStore = false;
10507
10508 switch (IntrinsicID) {
10509 case Intrinsic::aarch64_sve_st1wq:
10510 case Intrinsic::aarch64_sve_st1dq:
10511 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10512 PredTy =
10513 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10514 IsQuadStore = true;
10515 break;
10516 default:
10517 break;
10518 }
10519 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10520 Value *BasePtr = Ops[1];
10521
10522 // Does the store have an offset?
10523 if (Ops.size() == 4)
10524 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10525
10526 // Last value is always the data
10527 Value *Val =
10528 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10529
10530 Function *F =
10531 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10532 auto *Store =
10533 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10534 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10535 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10536 return Store;
10537}
10538
10541 unsigned IntID) {
10542 Ops[2] = EmitSVEPredicateCast(
10544
10545 SmallVector<Value *> NewOps;
10546 NewOps.push_back(Ops[2]);
10547
10548 llvm::Value *BasePtr = Ops[3];
10549 llvm::Value *RealSlice = Ops[1];
10550 // If the intrinsic contains the vnum parameter, multiply it with the vector
10551 // size in bytes.
10552 if (Ops.size() == 5) {
10553 Function *StreamingVectorLength =
10554 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10555 llvm::Value *StreamingVectorLengthCall =
10556 Builder.CreateCall(StreamingVectorLength);
10557 llvm::Value *Mulvl =
10558 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10559 // The type of the ptr parameter is void *, so use Int8Ty here.
10560 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10561 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
10562 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
10563 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
10564 }
10565 NewOps.push_back(BasePtr);
10566 NewOps.push_back(Ops[0]);
10567 NewOps.push_back(RealSlice);
10568 Function *F = CGM.getIntrinsic(IntID);
10569 return Builder.CreateCall(F, NewOps);
10570}
10571
10574 unsigned IntID) {
10575 auto *VecTy = getSVEType(TypeFlags);
10576 Function *F = CGM.getIntrinsic(IntID, VecTy);
10577 if (TypeFlags.isReadZA())
10578 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10579 else if (TypeFlags.isWriteZA())
10580 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10581 return Builder.CreateCall(F, Ops);
10582}
10583
10586 unsigned IntID) {
10587 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10588 if (Ops.size() == 0)
10589 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10590 Function *F = CGM.getIntrinsic(IntID, {});
10591 return Builder.CreateCall(F, Ops);
10592}
10593
10596 unsigned IntID) {
10597 if (Ops.size() == 2)
10598 Ops.push_back(Builder.getInt32(0));
10599 else
10600 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10601 Function *F = CGM.getIntrinsic(IntID, {});
10602 return Builder.CreateCall(F, Ops);
10603}
10604
10605// Limit the usage of scalable llvm IR generated by the ACLE by using the
10606// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10607Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10608 return Builder.CreateVectorSplat(
10609 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10610}
10611
10613 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
10614#ifndef NDEBUG
10615 auto *VecTy = cast<llvm::VectorType>(Ty);
10616 ElementCount EC = VecTy->getElementCount();
10617 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
10618 "Only <1 x i8> expected");
10619#endif
10620 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
10621 }
10622 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10623}
10624
10625Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10626 // FIXME: For big endian this needs an additional REV, or needs a separate
10627 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10628 // instruction is defined as 'bitwise' equivalent from memory point of
10629 // view (when storing/reloading), whereas the svreinterpret builtin
10630 // implements bitwise equivalent cast from register point of view.
10631 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10632
10633 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
10634 Value *Tuple = llvm::PoisonValue::get(Ty);
10635
10636 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10637 Value *In = Builder.CreateExtractValue(Val, I);
10638 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10639 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
10640 }
10641
10642 return Tuple;
10643 }
10644
10645 return Builder.CreateBitCast(Val, Ty);
10646}
10647
10648static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10650 auto *SplatZero = Constant::getNullValue(Ty);
10651 Ops.insert(Ops.begin(), SplatZero);
10652}
10653
10654static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10656 auto *SplatUndef = UndefValue::get(Ty);
10657 Ops.insert(Ops.begin(), SplatUndef);
10658}
10659
10662 llvm::Type *ResultType,
10663 ArrayRef<Value *> Ops) {
10664 if (TypeFlags.isOverloadNone())
10665 return {};
10666
10667 llvm::Type *DefaultType = getSVEType(TypeFlags);
10668
10669 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10670 return {DefaultType, Ops[1]->getType()};
10671
10672 if (TypeFlags.isOverloadWhileRW())
10673 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10674
10675 if (TypeFlags.isOverloadCvt())
10676 return {Ops[0]->getType(), Ops.back()->getType()};
10677
10678 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10679 ResultType->isVectorTy())
10680 return {ResultType, Ops[1]->getType()};
10681
10682 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10683 return {DefaultType};
10684}
10685
10687 ArrayRef<Value *> Ops) {
10688 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10689 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10690 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
10691
10692 if (TypeFlags.isTupleSet())
10693 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
10694 return Builder.CreateExtractValue(Ops[0], Idx);
10695}
10696
10698 llvm::Type *Ty,
10699 ArrayRef<Value *> Ops) {
10700 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10701
10702 Value *Tuple = llvm::PoisonValue::get(Ty);
10703 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
10704 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
10705
10706 return Tuple;
10707}
10708
10710 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10711 SVETypeFlags TypeFlags) {
10712 // Find out if any arguments are required to be integer constant expressions.
10713 unsigned ICEArguments = 0;
10715 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10716 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10717
10718 // Tuple set/get only requires one insert/extract vector, which is
10719 // created by EmitSVETupleSetOrGet.
10720 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10721
10722 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10723 bool IsICE = ICEArguments & (1 << i);
10724 Value *Arg = EmitScalarExpr(E->getArg(i));
10725
10726 if (IsICE) {
10727 // If this is required to be a constant, constant fold it so that we know
10728 // that the generated intrinsic gets a ConstantInt.
10729 std::optional<llvm::APSInt> Result =
10730 E->getArg(i)->getIntegerConstantExpr(getContext());
10731 assert(Result && "Expected argument to be a constant");
10732
10733 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10734 // truncate because the immediate has been range checked and no valid
10735 // immediate requires more than a handful of bits.
10736 *Result = Result->extOrTrunc(32);
10737 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10738 continue;
10739 }
10740
10741 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10742 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10743 Ops.push_back(Builder.CreateExtractValue(Arg, I));
10744
10745 continue;
10746 }
10747
10748 Ops.push_back(Arg);
10749 }
10750}
10751
10753 const CallExpr *E) {
10754 llvm::Type *Ty = ConvertType(E->getType());
10755 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10756 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10757 Value *Val = EmitScalarExpr(E->getArg(0));
10758 return EmitSVEReinterpret(Val, Ty);
10759 }
10760
10761 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10763
10765 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10766 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10767
10768 if (TypeFlags.isLoad())
10769 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10770 TypeFlags.isZExtReturn());
10771 else if (TypeFlags.isStore())
10772 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10773 else if (TypeFlags.isGatherLoad())
10774 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10775 else if (TypeFlags.isScatterStore())
10776 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10777 else if (TypeFlags.isPrefetch())
10778 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10779 else if (TypeFlags.isGatherPrefetch())
10780 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10781 else if (TypeFlags.isStructLoad())
10782 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10783 else if (TypeFlags.isStructStore())
10784 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10785 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10786 return EmitSVETupleSetOrGet(TypeFlags, Ops);
10787 else if (TypeFlags.isTupleCreate())
10788 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10789 else if (TypeFlags.isUndef())
10790 return UndefValue::get(Ty);
10791 else if (Builtin->LLVMIntrinsic != 0) {
10792 // Emit set FPMR for intrinsics that require it
10793 if (TypeFlags.setsFPMR())
10794 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
10795 Ops.pop_back_val());
10796 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10798
10799 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10801
10802 // Some ACLE builtins leave out the argument to specify the predicate
10803 // pattern, which is expected to be expanded to an SV_ALL pattern.
10804 if (TypeFlags.isAppendSVALL())
10805 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10806 if (TypeFlags.isInsertOp1SVALL())
10807 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10808
10809 // Predicates must match the main datatype.
10810 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10811 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10812 if (PredTy->getElementType()->isIntegerTy(1))
10813 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10814
10815 // Splat scalar operand to vector (intrinsics with _n infix)
10816 if (TypeFlags.hasSplatOperand()) {
10817 unsigned OpNo = TypeFlags.getSplatOperand();
10818 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10819 }
10820
10821 if (TypeFlags.isReverseCompare())
10822 std::swap(Ops[1], Ops[2]);
10823 else if (TypeFlags.isReverseUSDOT())
10824 std::swap(Ops[1], Ops[2]);
10825 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10826 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10827 std::swap(Ops[1], Ops[2]);
10828 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10829 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10830 std::swap(Ops[1], Ops[3]);
10831
10832 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10833 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10834 llvm::Type *OpndTy = Ops[1]->getType();
10835 auto *SplatZero = Constant::getNullValue(OpndTy);
10836 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10837 }
10838
10839 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10840 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10841 Value *Call = Builder.CreateCall(F, Ops);
10842
10843 if (Call->getType() == Ty)
10844 return Call;
10845
10846 // Predicate results must be converted to svbool_t.
10847 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
10848 return EmitSVEPredicateCast(Call, PredTy);
10849 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
10850 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
10851
10852 llvm_unreachable("unsupported element count!");
10853 }
10854
10855 switch (BuiltinID) {
10856 default:
10857 return nullptr;
10858
10859 case SVE::BI__builtin_sve_svreinterpret_b: {
10860 auto SVCountTy =
10861 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10862 Function *CastFromSVCountF =
10863 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10864 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10865 }
10866 case SVE::BI__builtin_sve_svreinterpret_c: {
10867 auto SVCountTy =
10868 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10869 Function *CastToSVCountF =
10870 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10871 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10872 }
10873
10874 case SVE::BI__builtin_sve_svpsel_lane_b8:
10875 case SVE::BI__builtin_sve_svpsel_lane_b16:
10876 case SVE::BI__builtin_sve_svpsel_lane_b32:
10877 case SVE::BI__builtin_sve_svpsel_lane_b64:
10878 case SVE::BI__builtin_sve_svpsel_lane_c8:
10879 case SVE::BI__builtin_sve_svpsel_lane_c16:
10880 case SVE::BI__builtin_sve_svpsel_lane_c32:
10881 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10882 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10883 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10884 "aarch64.svcount")) &&
10885 "Unexpected TargetExtType");
10886 auto SVCountTy =
10887 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10888 Function *CastFromSVCountF =
10889 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10890 Function *CastToSVCountF =
10891 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10892
10893 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10894 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10895 llvm::Value *Ops0 =
10896 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10897 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10898 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10899 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10900 }
10901 case SVE::BI__builtin_sve_svmov_b_z: {
10902 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10903 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10904 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10905 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10906 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10907 }
10908
10909 case SVE::BI__builtin_sve_svnot_b_z: {
10910 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10911 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10912 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10913 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10914 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10915 }
10916
10917 case SVE::BI__builtin_sve_svmovlb_u16:
10918 case SVE::BI__builtin_sve_svmovlb_u32:
10919 case SVE::BI__builtin_sve_svmovlb_u64:
10920 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10921
10922 case SVE::BI__builtin_sve_svmovlb_s16:
10923 case SVE::BI__builtin_sve_svmovlb_s32:
10924 case SVE::BI__builtin_sve_svmovlb_s64:
10925 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10926
10927 case SVE::BI__builtin_sve_svmovlt_u16:
10928 case SVE::BI__builtin_sve_svmovlt_u32:
10929 case SVE::BI__builtin_sve_svmovlt_u64:
10930 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10931
10932 case SVE::BI__builtin_sve_svmovlt_s16:
10933 case SVE::BI__builtin_sve_svmovlt_s32:
10934 case SVE::BI__builtin_sve_svmovlt_s64:
10935 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10936
10937 case SVE::BI__builtin_sve_svpmullt_u16:
10938 case SVE::BI__builtin_sve_svpmullt_u64:
10939 case SVE::BI__builtin_sve_svpmullt_n_u16:
10940 case SVE::BI__builtin_sve_svpmullt_n_u64:
10941 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10942
10943 case SVE::BI__builtin_sve_svpmullb_u16:
10944 case SVE::BI__builtin_sve_svpmullb_u64:
10945 case SVE::BI__builtin_sve_svpmullb_n_u16:
10946 case SVE::BI__builtin_sve_svpmullb_n_u64:
10947 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10948
10949 case SVE::BI__builtin_sve_svdup_n_b8:
10950 case SVE::BI__builtin_sve_svdup_n_b16:
10951 case SVE::BI__builtin_sve_svdup_n_b32:
10952 case SVE::BI__builtin_sve_svdup_n_b64: {
10953 Value *CmpNE =
10954 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10955 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10956 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10957 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10958 }
10959
10960 case SVE::BI__builtin_sve_svdupq_n_b8:
10961 case SVE::BI__builtin_sve_svdupq_n_b16:
10962 case SVE::BI__builtin_sve_svdupq_n_b32:
10963 case SVE::BI__builtin_sve_svdupq_n_b64:
10964 case SVE::BI__builtin_sve_svdupq_n_u8:
10965 case SVE::BI__builtin_sve_svdupq_n_s8:
10966 case SVE::BI__builtin_sve_svdupq_n_u64:
10967 case SVE::BI__builtin_sve_svdupq_n_f64:
10968 case SVE::BI__builtin_sve_svdupq_n_s64:
10969 case SVE::BI__builtin_sve_svdupq_n_u16:
10970 case SVE::BI__builtin_sve_svdupq_n_f16:
10971 case SVE::BI__builtin_sve_svdupq_n_bf16:
10972 case SVE::BI__builtin_sve_svdupq_n_s16:
10973 case SVE::BI__builtin_sve_svdupq_n_u32:
10974 case SVE::BI__builtin_sve_svdupq_n_f32:
10975 case SVE::BI__builtin_sve_svdupq_n_s32: {
10976 // These builtins are implemented by storing each element to an array and using
10977 // ld1rq to materialize a vector.
10978 unsigned NumOpnds = Ops.size();
10979
10980 bool IsBoolTy =
10981 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10982
10983 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10984 // so that the compare can use the width that is natural for the expected
10985 // number of predicate lanes.
10986 llvm::Type *EltTy = Ops[0]->getType();
10987 if (IsBoolTy)
10988 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10989
10991 for (unsigned I = 0; I < NumOpnds; ++I)
10992 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10993 Value *Vec = BuildVector(VecOps);
10994
10995 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10996 Value *InsertSubVec = Builder.CreateInsertVector(
10997 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10998
10999 Function *F =
11000 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
11001 Value *DupQLane =
11002 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
11003
11004 if (!IsBoolTy)
11005 return DupQLane;
11006
11007 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11008 Value *Pred = EmitSVEAllTruePred(TypeFlags);
11009
11010 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
11011 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
11012 : Intrinsic::aarch64_sve_cmpne_wide,
11013 OverloadedTy);
11014 Value *Call = Builder.CreateCall(
11015 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
11016 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
11017 }
11018
11019 case SVE::BI__builtin_sve_svpfalse_b:
11020 return ConstantInt::getFalse(Ty);
11021
11022 case SVE::BI__builtin_sve_svpfalse_c: {
11023 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
11024 Function *CastToSVCountF =
11025 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
11026 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
11027 }
11028
11029 case SVE::BI__builtin_sve_svlen_bf16:
11030 case SVE::BI__builtin_sve_svlen_f16:
11031 case SVE::BI__builtin_sve_svlen_f32:
11032 case SVE::BI__builtin_sve_svlen_f64:
11033 case SVE::BI__builtin_sve_svlen_s8:
11034 case SVE::BI__builtin_sve_svlen_s16:
11035 case SVE::BI__builtin_sve_svlen_s32:
11036 case SVE::BI__builtin_sve_svlen_s64:
11037 case SVE::BI__builtin_sve_svlen_u8:
11038 case SVE::BI__builtin_sve_svlen_u16:
11039 case SVE::BI__builtin_sve_svlen_u32:
11040 case SVE::BI__builtin_sve_svlen_u64: {
11041 SVETypeFlags TF(Builtin->TypeModifier);
11042 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
11043 auto *NumEls =
11044 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
11045
11046 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
11047 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
11048 }
11049
11050 case SVE::BI__builtin_sve_svtbl2_u8:
11051 case SVE::BI__builtin_sve_svtbl2_s8:
11052 case SVE::BI__builtin_sve_svtbl2_u16:
11053 case SVE::BI__builtin_sve_svtbl2_s16:
11054 case SVE::BI__builtin_sve_svtbl2_u32:
11055 case SVE::BI__builtin_sve_svtbl2_s32:
11056 case SVE::BI__builtin_sve_svtbl2_u64:
11057 case SVE::BI__builtin_sve_svtbl2_s64:
11058 case SVE::BI__builtin_sve_svtbl2_f16:
11059 case SVE::BI__builtin_sve_svtbl2_bf16:
11060 case SVE::BI__builtin_sve_svtbl2_f32:
11061 case SVE::BI__builtin_sve_svtbl2_f64: {
11062 SVETypeFlags TF(Builtin->TypeModifier);
11063 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
11064 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
11065 return Builder.CreateCall(F, Ops);
11066 }
11067
11068 case SVE::BI__builtin_sve_svset_neonq_s8:
11069 case SVE::BI__builtin_sve_svset_neonq_s16:
11070 case SVE::BI__builtin_sve_svset_neonq_s32:
11071 case SVE::BI__builtin_sve_svset_neonq_s64:
11072 case SVE::BI__builtin_sve_svset_neonq_u8:
11073 case SVE::BI__builtin_sve_svset_neonq_u16:
11074 case SVE::BI__builtin_sve_svset_neonq_u32:
11075 case SVE::BI__builtin_sve_svset_neonq_u64:
11076 case SVE::BI__builtin_sve_svset_neonq_f16:
11077 case SVE::BI__builtin_sve_svset_neonq_f32:
11078 case SVE::BI__builtin_sve_svset_neonq_f64:
11079 case SVE::BI__builtin_sve_svset_neonq_bf16: {
11080 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
11081 }
11082
11083 case SVE::BI__builtin_sve_svget_neonq_s8:
11084 case SVE::BI__builtin_sve_svget_neonq_s16:
11085 case SVE::BI__builtin_sve_svget_neonq_s32:
11086 case SVE::BI__builtin_sve_svget_neonq_s64:
11087 case SVE::BI__builtin_sve_svget_neonq_u8:
11088 case SVE::BI__builtin_sve_svget_neonq_u16:
11089 case SVE::BI__builtin_sve_svget_neonq_u32:
11090 case SVE::BI__builtin_sve_svget_neonq_u64:
11091 case SVE::BI__builtin_sve_svget_neonq_f16:
11092 case SVE::BI__builtin_sve_svget_neonq_f32:
11093 case SVE::BI__builtin_sve_svget_neonq_f64:
11094 case SVE::BI__builtin_sve_svget_neonq_bf16: {
11095 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
11096 }
11097
11098 case SVE::BI__builtin_sve_svdup_neonq_s8:
11099 case SVE::BI__builtin_sve_svdup_neonq_s16:
11100 case SVE::BI__builtin_sve_svdup_neonq_s32:
11101 case SVE::BI__builtin_sve_svdup_neonq_s64:
11102 case SVE::BI__builtin_sve_svdup_neonq_u8:
11103 case SVE::BI__builtin_sve_svdup_neonq_u16:
11104 case SVE::BI__builtin_sve_svdup_neonq_u32:
11105 case SVE::BI__builtin_sve_svdup_neonq_u64:
11106 case SVE::BI__builtin_sve_svdup_neonq_f16:
11107 case SVE::BI__builtin_sve_svdup_neonq_f32:
11108 case SVE::BI__builtin_sve_svdup_neonq_f64:
11109 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
11110 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
11111 Builder.getInt64(0));
11112 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
11113 {Insert, Builder.getInt64(0)});
11114 }
11115 }
11116
11117 /// Should not happen
11118 return nullptr;
11119}
11120
11121static void swapCommutativeSMEOperands(unsigned BuiltinID,
11123 unsigned MultiVec;
11124 switch (BuiltinID) {
11125 default:
11126 return;
11127 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
11128 MultiVec = 1;
11129 break;
11130 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
11131 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
11132 MultiVec = 2;
11133 break;
11134 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
11135 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
11136 MultiVec = 4;
11137 break;
11138 }
11139
11140 if (MultiVec > 0)
11141 for (unsigned I = 0; I < MultiVec; ++I)
11142 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
11143}
11144
11146 const CallExpr *E) {
11147 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
11149
11151 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11152 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
11153
11154 if (TypeFlags.isLoad() || TypeFlags.isStore())
11155 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11156 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
11157 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11158 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
11159 BuiltinID == SME::BI__builtin_sme_svzero_za)
11160 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11161 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
11162 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
11163 BuiltinID == SME::BI__builtin_sme_svldr_za ||
11164 BuiltinID == SME::BI__builtin_sme_svstr_za)
11165 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11166
11167 // Emit set FPMR for intrinsics that require it
11168 if (TypeFlags.setsFPMR())
11169 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
11170 Ops.pop_back_val());
11171 // Handle builtins which require their multi-vector operands to be swapped
11172 swapCommutativeSMEOperands(BuiltinID, Ops);
11173
11174 // Should not happen!
11175 if (Builtin->LLVMIntrinsic == 0)
11176 return nullptr;
11177
11178 if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
11179 // If we already know the streaming mode, don't bother with the intrinsic
11180 // and emit a constant instead
11181 const auto *FD = cast<FunctionDecl>(CurFuncDecl);
11182 if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
11183 unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
11184 if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
11185 bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
11186 return ConstantInt::getBool(Builder.getContext(), IsStreaming);
11187 }
11188 }
11189 }
11190
11191 // Predicates must match the main datatype.
11192 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
11193 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
11194 if (PredTy->getElementType()->isIntegerTy(1))
11195 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
11196
11197 Function *F =
11198 TypeFlags.isOverloadNone()
11199 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
11200 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
11201
11202 return Builder.CreateCall(F, Ops);
11203}
11204
11206 const CallExpr *E,
11207 llvm::Triple::ArchType Arch) {
11208 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
11209 BuiltinID <= clang::AArch64::LastSVEBuiltin)
11210 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
11211
11212 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
11213 BuiltinID <= clang::AArch64::LastSMEBuiltin)
11214 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
11215
11216 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
11217 return EmitAArch64CpuSupports(E);
11218
11219 unsigned HintID = static_cast<unsigned>(-1);
11220 switch (BuiltinID) {
11221 default: break;
11222 case clang::AArch64::BI__builtin_arm_nop:
11223 HintID = 0;
11224 break;
11225 case clang::AArch64::BI__builtin_arm_yield:
11226 case clang::AArch64::BI__yield:
11227 HintID = 1;
11228 break;
11229 case clang::AArch64::BI__builtin_arm_wfe:
11230 case clang::AArch64::BI__wfe:
11231 HintID = 2;
11232 break;
11233 case clang::AArch64::BI__builtin_arm_wfi:
11234 case clang::AArch64::BI__wfi:
11235 HintID = 3;
11236 break;
11237 case clang::AArch64::BI__builtin_arm_sev:
11238 case clang::AArch64::BI__sev:
11239 HintID = 4;
11240 break;
11241 case clang::AArch64::BI__builtin_arm_sevl:
11242 case clang::AArch64::BI__sevl:
11243 HintID = 5;
11244 break;
11245 }
11246
11247 if (HintID != static_cast<unsigned>(-1)) {
11248 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
11249 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
11250 }
11251
11252 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
11253 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
11254 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11255 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
11256 }
11257
11258 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
11259 // Create call to __arm_sme_state and store the results to the two pointers.
11261 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
11262 false),
11263 "__arm_sme_state"));
11264 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
11265 "aarch64_pstate_sm_compatible");
11266 CI->setAttributes(Attrs);
11267 CI->setCallingConv(
11268 llvm::CallingConv::
11269 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
11270 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
11271 EmitPointerWithAlignment(E->getArg(0)));
11272 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
11273 EmitPointerWithAlignment(E->getArg(1)));
11274 }
11275
11276 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
11277 assert((getContext().getTypeSize(E->getType()) == 32) &&
11278 "rbit of unusual size!");
11279 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11280 return Builder.CreateCall(
11281 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11282 }
11283 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
11284 assert((getContext().getTypeSize(E->getType()) == 64) &&
11285 "rbit of unusual size!");
11286 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11287 return Builder.CreateCall(
11288 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11289 }
11290
11291 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
11292 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
11293 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11294 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
11295 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11296 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
11297 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
11298 return Res;
11299 }
11300
11301 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
11302 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11303 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
11304 "cls");
11305 }
11306 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11307 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11308 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
11309 "cls");
11310 }
11311
11312 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11313 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11314 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11315 llvm::Type *Ty = Arg->getType();
11316 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11317 Arg, "frint32z");
11318 }
11319
11320 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11321 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11322 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11323 llvm::Type *Ty = Arg->getType();
11324 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11325 Arg, "frint64z");
11326 }
11327
11328 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11329 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11330 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11331 llvm::Type *Ty = Arg->getType();
11332 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11333 Arg, "frint32x");
11334 }
11335
11336 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11337 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11338 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11339 llvm::Type *Ty = Arg->getType();
11340 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11341 Arg, "frint64x");
11342 }
11343
11344 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11345 assert((getContext().getTypeSize(E->getType()) == 32) &&
11346 "__jcvt of unusual size!");
11347 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11348 return Builder.CreateCall(
11349 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11350 }
11351
11352 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11353 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11354 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11355 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11356 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11357 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11358
11359 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11360 // Load from the address via an LLVM intrinsic, receiving a
11361 // tuple of 8 i64 words, and store each one to ValPtr.
11362 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11363 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11364 llvm::Value *ToRet;
11365 for (size_t i = 0; i < 8; i++) {
11366 llvm::Value *ValOffsetPtr =
11367 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11368 Address Addr =
11369 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11370 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11371 }
11372 return ToRet;
11373 } else {
11374 // Load 8 i64 words from ValPtr, and store them to the address
11375 // via an LLVM intrinsic.
11377 Args.push_back(MemAddr);
11378 for (size_t i = 0; i < 8; i++) {
11379 llvm::Value *ValOffsetPtr =
11380 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11381 Address Addr =
11382 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11383 Args.push_back(Builder.CreateLoad(Addr));
11384 }
11385
11386 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11387 ? Intrinsic::aarch64_st64b
11388 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11389 ? Intrinsic::aarch64_st64bv
11390 : Intrinsic::aarch64_st64bv0);
11391 Function *F = CGM.getIntrinsic(Intr);
11392 return Builder.CreateCall(F, Args);
11393 }
11394 }
11395
11396 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11397 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11398
11399 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11400 ? Intrinsic::aarch64_rndr
11401 : Intrinsic::aarch64_rndrrs);
11402 Function *F = CGM.getIntrinsic(Intr);
11403 llvm::Value *Val = Builder.CreateCall(F);
11404 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11405 Value *Status = Builder.CreateExtractValue(Val, 1);
11406
11407 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11408 Builder.CreateStore(RandomValue, MemAddress);
11409 Status = Builder.CreateZExt(Status, Int32Ty);
11410 return Status;
11411 }
11412
11413 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11414 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11415 const FunctionDecl *FD = E->getDirectCallee();
11416 Value *Ops[2];
11417 for (unsigned i = 0; i < 2; i++)
11418 Ops[i] = EmitScalarExpr(E->getArg(i));
11419 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11420 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11421 StringRef Name = FD->getName();
11422 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11423 }
11424
11425 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11426 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11427 getContext().getTypeSize(E->getType()) == 128) {
11428 Function *F =
11429 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11430 ? Intrinsic::aarch64_ldaxp
11431 : Intrinsic::aarch64_ldxp);
11432
11433 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11434 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11435
11436 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11437 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11438 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11439 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11440 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11441
11442 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11443 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11444 Val = Builder.CreateOr(Val, Val1);
11445 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11446 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11447 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11448 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11449
11450 QualType Ty = E->getType();
11451 llvm::Type *RealResTy = ConvertType(Ty);
11452 llvm::Type *IntTy =
11453 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11454
11455 Function *F =
11456 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11457 ? Intrinsic::aarch64_ldaxr
11458 : Intrinsic::aarch64_ldxr,
11459 UnqualPtrTy);
11460 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11461 Val->addParamAttr(
11462 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11463
11464 if (RealResTy->isPointerTy())
11465 return Builder.CreateIntToPtr(Val, RealResTy);
11466
11467 llvm::Type *IntResTy = llvm::IntegerType::get(
11468 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11469 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11470 RealResTy);
11471 }
11472
11473 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11474 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11475 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11476 Function *F =
11477 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11478 ? Intrinsic::aarch64_stlxp
11479 : Intrinsic::aarch64_stxp);
11480 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11481
11482 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11483 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11484
11485 Tmp = Tmp.withElementType(STy);
11486 llvm::Value *Val = Builder.CreateLoad(Tmp);
11487
11488 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11489 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11490 Value *StPtr = EmitScalarExpr(E->getArg(1));
11491 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11492 }
11493
11494 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11495 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11496 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11497 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11498
11499 QualType Ty = E->getArg(0)->getType();
11500 llvm::Type *StoreTy =
11501 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11502
11503 if (StoreVal->getType()->isPointerTy())
11504 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11505 else {
11506 llvm::Type *IntTy = llvm::IntegerType::get(
11508 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11509 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11510 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11511 }
11512
11513 Function *F =
11514 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11515 ? Intrinsic::aarch64_stlxr
11516 : Intrinsic::aarch64_stxr,
11517 StoreAddr->getType());
11518 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11519 CI->addParamAttr(
11520 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11521 return CI;
11522 }
11523
11524 if (BuiltinID == clang::AArch64::BI__getReg) {
11526 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11527 llvm_unreachable("Sema will ensure that the parameter is constant");
11528
11529 llvm::APSInt Value = Result.Val.getInt();
11530 LLVMContext &Context = CGM.getLLVMContext();
11531 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11532
11533 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11534 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11535 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11536
11537 llvm::Function *F =
11538 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11539 return Builder.CreateCall(F, Metadata);
11540 }
11541
11542 if (BuiltinID == clang::AArch64::BI__break) {
11544 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11545 llvm_unreachable("Sema will ensure that the parameter is constant");
11546
11547 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11548 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11549 }
11550
11551 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11552 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11553 return Builder.CreateCall(F);
11554 }
11555
11556 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11557 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11558 llvm::SyncScope::SingleThread);
11559
11560 // CRC32
11561 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11562 switch (BuiltinID) {
11563 case clang::AArch64::BI__builtin_arm_crc32b:
11564 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11565 case clang::AArch64::BI__builtin_arm_crc32cb:
11566 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11567 case clang::AArch64::BI__builtin_arm_crc32h:
11568 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11569 case clang::AArch64::BI__builtin_arm_crc32ch:
11570 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11571 case clang::AArch64::BI__builtin_arm_crc32w:
11572 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11573 case clang::AArch64::BI__builtin_arm_crc32cw:
11574 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11575 case clang::AArch64::BI__builtin_arm_crc32d:
11576 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11577 case clang::AArch64::BI__builtin_arm_crc32cd:
11578 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11579 }
11580
11581 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11582 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11583 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11584 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11585
11586 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11587 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11588
11589 return Builder.CreateCall(F, {Arg0, Arg1});
11590 }
11591
11592 // Memory Operations (MOPS)
11593 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11594 Value *Dst = EmitScalarExpr(E->getArg(0));
11595 Value *Val = EmitScalarExpr(E->getArg(1));
11596 Value *Size = EmitScalarExpr(E->getArg(2));
11597 Val = Builder.CreateTrunc(Val, Int8Ty);
11598 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11599 return Builder.CreateCall(
11600 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11601 }
11602
11603 // Memory Tagging Extensions (MTE) Intrinsics
11604 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11605 switch (BuiltinID) {
11606 case clang::AArch64::BI__builtin_arm_irg:
11607 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11608 case clang::AArch64::BI__builtin_arm_addg:
11609 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11610 case clang::AArch64::BI__builtin_arm_gmi:
11611 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11612 case clang::AArch64::BI__builtin_arm_ldg:
11613 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11614 case clang::AArch64::BI__builtin_arm_stg:
11615 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11616 case clang::AArch64::BI__builtin_arm_subp:
11617 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11618 }
11619
11620 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11621 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11622 Value *Pointer = EmitScalarExpr(E->getArg(0));
11623 Value *Mask = EmitScalarExpr(E->getArg(1));
11624
11625 Mask = Builder.CreateZExt(Mask, Int64Ty);
11626 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11627 {Pointer, Mask});
11628 }
11629 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11630 Value *Pointer = EmitScalarExpr(E->getArg(0));
11631 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11632
11633 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11634 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11635 {Pointer, TagOffset});
11636 }
11637 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11638 Value *Pointer = EmitScalarExpr(E->getArg(0));
11639 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11640
11641 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11642 return Builder.CreateCall(
11643 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11644 }
11645 // Although it is possible to supply a different return
11646 // address (first arg) to this intrinsic, for now we set
11647 // return address same as input address.
11648 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11649 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11650 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11651 {TagAddress, TagAddress});
11652 }
11653 // Although it is possible to supply a different tag (to set)
11654 // to this intrinsic (as first arg), for now we supply
11655 // the tag that is in input address arg (common use case).
11656 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11657 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11658 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11659 {TagAddress, TagAddress});
11660 }
11661 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11662 Value *PointerA = EmitScalarExpr(E->getArg(0));
11663 Value *PointerB = EmitScalarExpr(E->getArg(1));
11664 return Builder.CreateCall(
11665 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11666 }
11667 }
11668
11669 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11670 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11671 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11672 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11673 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11674 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11675 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11676 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11677
11678 SpecialRegisterAccessKind AccessKind = Write;
11679 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11680 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11681 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11682 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11683 AccessKind = VolatileRead;
11684
11685 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11686 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11687
11688 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11689 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11690
11691 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11692 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11693
11694 llvm::Type *ValueType;
11695 llvm::Type *RegisterType = Int64Ty;
11696 if (Is32Bit) {
11697 ValueType = Int32Ty;
11698 } else if (Is128Bit) {
11699 llvm::Type *Int128Ty =
11700 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11701 ValueType = Int128Ty;
11702 RegisterType = Int128Ty;
11703 } else if (IsPointerBuiltin) {
11704 ValueType = VoidPtrTy;
11705 } else {
11706 ValueType = Int64Ty;
11707 };
11708
11709 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11710 AccessKind);
11711 }
11712
11713 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11714 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11715 LLVMContext &Context = CGM.getLLVMContext();
11716
11717 unsigned SysReg =
11718 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11719
11720 std::string SysRegStr;
11721 llvm::raw_string_ostream(SysRegStr) <<
11722 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11723 ((SysReg >> 11) & 7) << ":" <<
11724 ((SysReg >> 7) & 15) << ":" <<
11725 ((SysReg >> 3) & 15) << ":" <<
11726 ( SysReg & 7);
11727
11728 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11729 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11730 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11731
11732 llvm::Type *RegisterType = Int64Ty;
11733 llvm::Type *Types[] = { RegisterType };
11734
11735 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11736 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11737
11738 return Builder.CreateCall(F, Metadata);
11739 }
11740
11741 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11742 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11743
11744 return Builder.CreateCall(F, { Metadata, ArgValue });
11745 }
11746
11747 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11748 llvm::Function *F =
11749 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11750 return Builder.CreateCall(F);
11751 }
11752
11753 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11754 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11755 return Builder.CreateCall(F);
11756 }
11757
11758 if (BuiltinID == clang::AArch64::BI__mulh ||
11759 BuiltinID == clang::AArch64::BI__umulh) {
11760 llvm::Type *ResType = ConvertType(E->getType());
11761 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11762
11763 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11764 Value *LHS =
11765 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11766 Value *RHS =
11767 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11768
11769 Value *MulResult, *HigherBits;
11770 if (IsSigned) {
11771 MulResult = Builder.CreateNSWMul(LHS, RHS);
11772 HigherBits = Builder.CreateAShr(MulResult, 64);
11773 } else {
11774 MulResult = Builder.CreateNUWMul(LHS, RHS);
11775 HigherBits = Builder.CreateLShr(MulResult, 64);
11776 }
11777 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11778
11779 return HigherBits;
11780 }
11781
11782 if (BuiltinID == AArch64::BI__writex18byte ||
11783 BuiltinID == AArch64::BI__writex18word ||
11784 BuiltinID == AArch64::BI__writex18dword ||
11785 BuiltinID == AArch64::BI__writex18qword) {
11786 // Process the args first
11787 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11788 Value *DataArg = EmitScalarExpr(E->getArg(1));
11789
11790 // Read x18 as i8*
11791 llvm::Value *X18 = readX18AsPtr(*this);
11792
11793 // Store val at x18 + offset
11794 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11795 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11796 StoreInst *Store =
11797 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
11798 return Store;
11799 }
11800
11801 if (BuiltinID == AArch64::BI__readx18byte ||
11802 BuiltinID == AArch64::BI__readx18word ||
11803 BuiltinID == AArch64::BI__readx18dword ||
11804 BuiltinID == AArch64::BI__readx18qword) {
11805 // Process the args first
11806 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11807
11808 // Read x18 as i8*
11809 llvm::Value *X18 = readX18AsPtr(*this);
11810
11811 // Load x18 + offset
11812 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11813 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11814 llvm::Type *IntTy = ConvertType(E->getType());
11815 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11816 return Load;
11817 }
11818
11819 if (BuiltinID == AArch64::BI__addx18byte ||
11820 BuiltinID == AArch64::BI__addx18word ||
11821 BuiltinID == AArch64::BI__addx18dword ||
11822 BuiltinID == AArch64::BI__addx18qword ||
11823 BuiltinID == AArch64::BI__incx18byte ||
11824 BuiltinID == AArch64::BI__incx18word ||
11825 BuiltinID == AArch64::BI__incx18dword ||
11826 BuiltinID == AArch64::BI__incx18qword) {
11827 llvm::Type *IntTy;
11828 bool isIncrement;
11829 switch (BuiltinID) {
11830 case AArch64::BI__incx18byte:
11831 IntTy = Int8Ty;
11832 isIncrement = true;
11833 break;
11834 case AArch64::BI__incx18word:
11835 IntTy = Int16Ty;
11836 isIncrement = true;
11837 break;
11838 case AArch64::BI__incx18dword:
11839 IntTy = Int32Ty;
11840 isIncrement = true;
11841 break;
11842 case AArch64::BI__incx18qword:
11843 IntTy = Int64Ty;
11844 isIncrement = true;
11845 break;
11846 default:
11847 IntTy = ConvertType(E->getArg(1)->getType());
11848 isIncrement = false;
11849 break;
11850 }
11851 // Process the args first
11852 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11853 Value *ValToAdd =
11854 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
11855
11856 // Read x18 as i8*
11857 llvm::Value *X18 = readX18AsPtr(*this);
11858
11859 // Load x18 + offset
11860 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11861 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11862 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11863
11864 // Add values
11865 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
11866
11867 // Store val at x18 + offset
11868 StoreInst *Store =
11869 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
11870 return Store;
11871 }
11872
11873 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11874 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11875 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11876 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11877 Value *Arg = EmitScalarExpr(E->getArg(0));
11878 llvm::Type *RetTy = ConvertType(E->getType());
11879 return Builder.CreateBitCast(Arg, RetTy);
11880 }
11881
11882 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11883 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11884 BuiltinID == AArch64::BI_CountLeadingZeros ||
11885 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11886 Value *Arg = EmitScalarExpr(E->getArg(0));
11887 llvm::Type *ArgType = Arg->getType();
11888
11889 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11890 BuiltinID == AArch64::BI_CountLeadingOnes64)
11891 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11892
11893 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11894 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11895
11896 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11897 BuiltinID == AArch64::BI_CountLeadingZeros64)
11898 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11899 return Result;
11900 }
11901
11902 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11903 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11904 Value *Arg = EmitScalarExpr(E->getArg(0));
11905
11906 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11907 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11908 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11909
11910 Value *Result = Builder.CreateCall(F, Arg, "cls");
11911 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11912 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11913 return Result;
11914 }
11915
11916 if (BuiltinID == AArch64::BI_CountOneBits ||
11917 BuiltinID == AArch64::BI_CountOneBits64) {
11918 Value *ArgValue = EmitScalarExpr(E->getArg(0));
11919 llvm::Type *ArgType = ArgValue->getType();
11920 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11921
11922 Value *Result = Builder.CreateCall(F, ArgValue);
11923 if (BuiltinID == AArch64::BI_CountOneBits64)
11924 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11925 return Result;
11926 }
11927
11928 if (BuiltinID == AArch64::BI__prefetch) {
11929 Value *Address = EmitScalarExpr(E->getArg(0));
11930 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11931 Value *Locality = ConstantInt::get(Int32Ty, 3);
11932 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11933 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11934 return Builder.CreateCall(F, {Address, RW, Locality, Data});
11935 }
11936
11937 if (BuiltinID == AArch64::BI__hlt) {
11938 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
11939 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11940
11941 // Return 0 for convenience, even though MSVC returns some other undefined
11942 // value.
11943 return ConstantInt::get(Builder.getInt32Ty(), 0);
11944 }
11945
11946 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
11947 return Builder.CreateFPTrunc(
11948 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
11949 Builder.getFloatTy()),
11950 Builder.getBFloatTy());
11951
11952 // Handle MSVC intrinsics before argument evaluation to prevent double
11953 // evaluation.
11954 if (std::optional<MSVCIntrin> MsvcIntId =
11956 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11957
11958 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11959 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11960 return P.first == BuiltinID;
11961 });
11962 if (It != end(NEONEquivalentIntrinsicMap))
11963 BuiltinID = It->second;
11964
11965 // Find out if any arguments are required to be integer constant
11966 // expressions.
11967 unsigned ICEArguments = 0;
11969 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11970 assert(Error == ASTContext::GE_None && "Should not codegen an error");
11971
11973 Address PtrOp0 = Address::invalid();
11974 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11975 if (i == 0) {
11976 switch (BuiltinID) {
11977 case NEON::BI__builtin_neon_vld1_v:
11978 case NEON::BI__builtin_neon_vld1q_v:
11979 case NEON::BI__builtin_neon_vld1_dup_v:
11980 case NEON::BI__builtin_neon_vld1q_dup_v:
11981 case NEON::BI__builtin_neon_vld1_lane_v:
11982 case NEON::BI__builtin_neon_vld1q_lane_v:
11983 case NEON::BI__builtin_neon_vst1_v:
11984 case NEON::BI__builtin_neon_vst1q_v:
11985 case NEON::BI__builtin_neon_vst1_lane_v:
11986 case NEON::BI__builtin_neon_vst1q_lane_v:
11987 case NEON::BI__builtin_neon_vldap1_lane_s64:
11988 case NEON::BI__builtin_neon_vldap1q_lane_s64:
11989 case NEON::BI__builtin_neon_vstl1_lane_s64:
11990 case NEON::BI__builtin_neon_vstl1q_lane_s64:
11991 // Get the alignment for the argument in addition to the value;
11992 // we'll use it later.
11993 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11994 Ops.push_back(PtrOp0.emitRawPointer(*this));
11995 continue;
11996 }
11997 }
11998 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11999 }
12000
12001 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
12002 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
12003 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
12004
12005 if (Builtin) {
12006 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
12007 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
12008 assert(Result && "SISD intrinsic should have been handled");
12009 return Result;
12010 }
12011
12012 const Expr *Arg = E->getArg(E->getNumArgs()-1);
12014 if (std::optional<llvm::APSInt> Result =
12016 // Determine the type of this overloaded NEON intrinsic.
12017 Type = NeonTypeFlags(Result->getZExtValue());
12018
12019 bool usgn = Type.isUnsigned();
12020 bool quad = Type.isQuad();
12021
12022 // Handle non-overloaded intrinsics first.
12023 switch (BuiltinID) {
12024 default: break;
12025 case NEON::BI__builtin_neon_vabsh_f16:
12026 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12027 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
12028 case NEON::BI__builtin_neon_vaddq_p128: {
12029 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
12030 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12031 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12032 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12033 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
12034 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12035 return Builder.CreateBitCast(Ops[0], Int128Ty);
12036 }
12037 case NEON::BI__builtin_neon_vldrq_p128: {
12038 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12039 Value *Ptr = EmitScalarExpr(E->getArg(0));
12040 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
12042 }
12043 case NEON::BI__builtin_neon_vstrq_p128: {
12044 Value *Ptr = Ops[0];
12045 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
12046 }
12047 case NEON::BI__builtin_neon_vcvts_f32_u32:
12048 case NEON::BI__builtin_neon_vcvtd_f64_u64:
12049 usgn = true;
12050 [[fallthrough]];
12051 case NEON::BI__builtin_neon_vcvts_f32_s32:
12052 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
12053 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12054 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
12055 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
12056 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
12057 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12058 if (usgn)
12059 return Builder.CreateUIToFP(Ops[0], FTy);
12060 return Builder.CreateSIToFP(Ops[0], FTy);
12061 }
12062 case NEON::BI__builtin_neon_vcvth_f16_u16:
12063 case NEON::BI__builtin_neon_vcvth_f16_u32:
12064 case NEON::BI__builtin_neon_vcvth_f16_u64:
12065 usgn = true;
12066 [[fallthrough]];
12067 case NEON::BI__builtin_neon_vcvth_f16_s16:
12068 case NEON::BI__builtin_neon_vcvth_f16_s32:
12069 case NEON::BI__builtin_neon_vcvth_f16_s64: {
12070 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12071 llvm::Type *FTy = HalfTy;
12072 llvm::Type *InTy;
12073 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
12074 InTy = Int64Ty;
12075 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
12076 InTy = Int32Ty;
12077 else
12078 InTy = Int16Ty;
12079 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12080 if (usgn)
12081 return Builder.CreateUIToFP(Ops[0], FTy);
12082 return Builder.CreateSIToFP(Ops[0], FTy);
12083 }
12084 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12085 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12086 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12087 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12088 case NEON::BI__builtin_neon_vcvth_u16_f16:
12089 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12090 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12091 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12092 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12093 case NEON::BI__builtin_neon_vcvth_s16_f16: {
12094 unsigned Int;
12095 llvm::Type* InTy = Int32Ty;
12096 llvm::Type* FTy = HalfTy;
12097 llvm::Type *Tys[2] = {InTy, FTy};
12098 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12099 switch (BuiltinID) {
12100 default: llvm_unreachable("missing builtin ID in switch!");
12101 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12102 Int = Intrinsic::aarch64_neon_fcvtau; break;
12103 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12104 Int = Intrinsic::aarch64_neon_fcvtmu; break;
12105 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12106 Int = Intrinsic::aarch64_neon_fcvtnu; break;
12107 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12108 Int = Intrinsic::aarch64_neon_fcvtpu; break;
12109 case NEON::BI__builtin_neon_vcvth_u16_f16:
12110 Int = Intrinsic::aarch64_neon_fcvtzu; break;
12111 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12112 Int = Intrinsic::aarch64_neon_fcvtas; break;
12113 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12114 Int = Intrinsic::aarch64_neon_fcvtms; break;
12115 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12116 Int = Intrinsic::aarch64_neon_fcvtns; break;
12117 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12118 Int = Intrinsic::aarch64_neon_fcvtps; break;
12119 case NEON::BI__builtin_neon_vcvth_s16_f16:
12120 Int = Intrinsic::aarch64_neon_fcvtzs; break;
12121 }
12122 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
12123 return Builder.CreateTrunc(Ops[0], Int16Ty);
12124 }
12125 case NEON::BI__builtin_neon_vcaleh_f16:
12126 case NEON::BI__builtin_neon_vcalth_f16:
12127 case NEON::BI__builtin_neon_vcageh_f16:
12128 case NEON::BI__builtin_neon_vcagth_f16: {
12129 unsigned Int;
12130 llvm::Type* InTy = Int32Ty;
12131 llvm::Type* FTy = HalfTy;
12132 llvm::Type *Tys[2] = {InTy, FTy};
12133 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12134 switch (BuiltinID) {
12135 default: llvm_unreachable("missing builtin ID in switch!");
12136 case NEON::BI__builtin_neon_vcageh_f16:
12137 Int = Intrinsic::aarch64_neon_facge; break;
12138 case NEON::BI__builtin_neon_vcagth_f16:
12139 Int = Intrinsic::aarch64_neon_facgt; break;
12140 case NEON::BI__builtin_neon_vcaleh_f16:
12141 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
12142 case NEON::BI__builtin_neon_vcalth_f16:
12143 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
12144 }
12145 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
12146 return Builder.CreateTrunc(Ops[0], Int16Ty);
12147 }
12148 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12149 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
12150 unsigned Int;
12151 llvm::Type* InTy = Int32Ty;
12152 llvm::Type* FTy = HalfTy;
12153 llvm::Type *Tys[2] = {InTy, FTy};
12154 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12155 switch (BuiltinID) {
12156 default: llvm_unreachable("missing builtin ID in switch!");
12157 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12158 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
12159 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
12160 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
12161 }
12162 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12163 return Builder.CreateTrunc(Ops[0], Int16Ty);
12164 }
12165 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12166 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
12167 unsigned Int;
12168 llvm::Type* FTy = HalfTy;
12169 llvm::Type* InTy = Int32Ty;
12170 llvm::Type *Tys[2] = {FTy, InTy};
12171 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12172 switch (BuiltinID) {
12173 default: llvm_unreachable("missing builtin ID in switch!");
12174 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12175 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
12176 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
12177 break;
12178 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
12179 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
12180 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
12181 break;
12182 }
12183 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12184 }
12185 case NEON::BI__builtin_neon_vpaddd_s64: {
12186 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
12187 Value *Vec = EmitScalarExpr(E->getArg(0));
12188 // The vector is v2f64, so make sure it's bitcast to that.
12189 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
12190 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12191 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12192 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12193 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12194 // Pairwise addition of a v2f64 into a scalar f64.
12195 return Builder.CreateAdd(Op0, Op1, "vpaddd");
12196 }
12197 case NEON::BI__builtin_neon_vpaddd_f64: {
12198 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
12199 Value *Vec = EmitScalarExpr(E->getArg(0));
12200 // The vector is v2f64, so make sure it's bitcast to that.
12201 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
12202 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12203 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12204 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12205 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12206 // Pairwise addition of a v2f64 into a scalar f64.
12207 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12208 }
12209 case NEON::BI__builtin_neon_vpadds_f32: {
12210 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
12211 Value *Vec = EmitScalarExpr(E->getArg(0));
12212 // The vector is v2f32, so make sure it's bitcast to that.
12213 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
12214 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12215 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12216 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12217 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12218 // Pairwise addition of a v2f32 into a scalar f32.
12219 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12220 }
12221 case NEON::BI__builtin_neon_vceqzd_s64:
12222 case NEON::BI__builtin_neon_vceqzd_f64:
12223 case NEON::BI__builtin_neon_vceqzs_f32:
12224 case NEON::BI__builtin_neon_vceqzh_f16:
12225 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12227 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12228 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
12229 case NEON::BI__builtin_neon_vcgezd_s64:
12230 case NEON::BI__builtin_neon_vcgezd_f64:
12231 case NEON::BI__builtin_neon_vcgezs_f32:
12232 case NEON::BI__builtin_neon_vcgezh_f16:
12233 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12235 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12236 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
12237 case NEON::BI__builtin_neon_vclezd_s64:
12238 case NEON::BI__builtin_neon_vclezd_f64:
12239 case NEON::BI__builtin_neon_vclezs_f32:
12240 case NEON::BI__builtin_neon_vclezh_f16:
12241 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12243 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12244 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
12245 case NEON::BI__builtin_neon_vcgtzd_s64:
12246 case NEON::BI__builtin_neon_vcgtzd_f64:
12247 case NEON::BI__builtin_neon_vcgtzs_f32:
12248 case NEON::BI__builtin_neon_vcgtzh_f16:
12249 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12251 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12252 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
12253 case NEON::BI__builtin_neon_vcltzd_s64:
12254 case NEON::BI__builtin_neon_vcltzd_f64:
12255 case NEON::BI__builtin_neon_vcltzs_f32:
12256 case NEON::BI__builtin_neon_vcltzh_f16:
12257 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12259 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12260 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
12261
12262 case NEON::BI__builtin_neon_vceqzd_u64: {
12263 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12264 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12265 Ops[0] =
12266 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
12267 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
12268 }
12269 case NEON::BI__builtin_neon_vceqd_f64:
12270 case NEON::BI__builtin_neon_vcled_f64:
12271 case NEON::BI__builtin_neon_vcltd_f64:
12272 case NEON::BI__builtin_neon_vcged_f64:
12273 case NEON::BI__builtin_neon_vcgtd_f64: {
12274 llvm::CmpInst::Predicate P;
12275 switch (BuiltinID) {
12276 default: llvm_unreachable("missing builtin ID in switch!");
12277 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
12278 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
12279 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
12280 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
12281 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
12282 }
12283 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12284 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12285 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12286 if (P == llvm::FCmpInst::FCMP_OEQ)
12287 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12288 else
12289 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12290 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
12291 }
12292 case NEON::BI__builtin_neon_vceqs_f32:
12293 case NEON::BI__builtin_neon_vcles_f32:
12294 case NEON::BI__builtin_neon_vclts_f32:
12295 case NEON::BI__builtin_neon_vcges_f32:
12296 case NEON::BI__builtin_neon_vcgts_f32: {
12297 llvm::CmpInst::Predicate P;
12298 switch (BuiltinID) {
12299 default: llvm_unreachable("missing builtin ID in switch!");
12300 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
12301 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
12302 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
12303 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
12304 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
12305 }
12306 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12307 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
12308 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
12309 if (P == llvm::FCmpInst::FCMP_OEQ)
12310 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12311 else
12312 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12313 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
12314 }
12315 case NEON::BI__builtin_neon_vceqh_f16:
12316 case NEON::BI__builtin_neon_vcleh_f16:
12317 case NEON::BI__builtin_neon_vclth_f16:
12318 case NEON::BI__builtin_neon_vcgeh_f16:
12319 case NEON::BI__builtin_neon_vcgth_f16: {
12320 llvm::CmpInst::Predicate P;
12321 switch (BuiltinID) {
12322 default: llvm_unreachable("missing builtin ID in switch!");
12323 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
12324 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
12325 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
12326 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
12327 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
12328 }
12329 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12330 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
12331 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
12332 if (P == llvm::FCmpInst::FCMP_OEQ)
12333 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12334 else
12335 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12336 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
12337 }
12338 case NEON::BI__builtin_neon_vceqd_s64:
12339 case NEON::BI__builtin_neon_vceqd_u64:
12340 case NEON::BI__builtin_neon_vcgtd_s64:
12341 case NEON::BI__builtin_neon_vcgtd_u64:
12342 case NEON::BI__builtin_neon_vcltd_s64:
12343 case NEON::BI__builtin_neon_vcltd_u64:
12344 case NEON::BI__builtin_neon_vcged_u64:
12345 case NEON::BI__builtin_neon_vcged_s64:
12346 case NEON::BI__builtin_neon_vcled_u64:
12347 case NEON::BI__builtin_neon_vcled_s64: {
12348 llvm::CmpInst::Predicate P;
12349 switch (BuiltinID) {
12350 default: llvm_unreachable("missing builtin ID in switch!");
12351 case NEON::BI__builtin_neon_vceqd_s64:
12352 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12353 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12354 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12355 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12356 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12357 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12358 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12359 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12360 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12361 }
12362 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12363 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12364 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12365 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12366 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12367 }
12368 case NEON::BI__builtin_neon_vtstd_s64:
12369 case NEON::BI__builtin_neon_vtstd_u64: {
12370 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12371 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12372 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12373 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12374 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12375 llvm::Constant::getNullValue(Int64Ty));
12376 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12377 }
12378 case NEON::BI__builtin_neon_vset_lane_i8:
12379 case NEON::BI__builtin_neon_vset_lane_i16:
12380 case NEON::BI__builtin_neon_vset_lane_i32:
12381 case NEON::BI__builtin_neon_vset_lane_i64:
12382 case NEON::BI__builtin_neon_vset_lane_bf16:
12383 case NEON::BI__builtin_neon_vset_lane_f32:
12384 case NEON::BI__builtin_neon_vsetq_lane_i8:
12385 case NEON::BI__builtin_neon_vsetq_lane_i16:
12386 case NEON::BI__builtin_neon_vsetq_lane_i32:
12387 case NEON::BI__builtin_neon_vsetq_lane_i64:
12388 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12389 case NEON::BI__builtin_neon_vsetq_lane_f32:
12390 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12391 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12392 case NEON::BI__builtin_neon_vset_lane_f64:
12393 // The vector type needs a cast for the v1f64 variant.
12394 Ops[1] =
12395 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12396 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12397 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12398 case NEON::BI__builtin_neon_vsetq_lane_f64:
12399 // The vector type needs a cast for the v2f64 variant.
12400 Ops[1] =
12401 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12402 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12403 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12404
12405 case NEON::BI__builtin_neon_vget_lane_i8:
12406 case NEON::BI__builtin_neon_vdupb_lane_i8:
12407 Ops[0] =
12408 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12409 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12410 "vget_lane");
12411 case NEON::BI__builtin_neon_vgetq_lane_i8:
12412 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12413 Ops[0] =
12414 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12415 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12416 "vgetq_lane");
12417 case NEON::BI__builtin_neon_vget_lane_i16:
12418 case NEON::BI__builtin_neon_vduph_lane_i16:
12419 Ops[0] =
12420 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12421 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12422 "vget_lane");
12423 case NEON::BI__builtin_neon_vgetq_lane_i16:
12424 case NEON::BI__builtin_neon_vduph_laneq_i16:
12425 Ops[0] =
12426 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12427 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12428 "vgetq_lane");
12429 case NEON::BI__builtin_neon_vget_lane_i32:
12430 case NEON::BI__builtin_neon_vdups_lane_i32:
12431 Ops[0] =
12432 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12433 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12434 "vget_lane");
12435 case NEON::BI__builtin_neon_vdups_lane_f32:
12436 Ops[0] =
12437 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12438 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12439 "vdups_lane");
12440 case NEON::BI__builtin_neon_vgetq_lane_i32:
12441 case NEON::BI__builtin_neon_vdups_laneq_i32:
12442 Ops[0] =
12443 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12444 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12445 "vgetq_lane");
12446 case NEON::BI__builtin_neon_vget_lane_i64:
12447 case NEON::BI__builtin_neon_vdupd_lane_i64:
12448 Ops[0] =
12449 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12450 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12451 "vget_lane");
12452 case NEON::BI__builtin_neon_vdupd_lane_f64:
12453 Ops[0] =
12454 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12455 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12456 "vdupd_lane");
12457 case NEON::BI__builtin_neon_vgetq_lane_i64:
12458 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12459 Ops[0] =
12460 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12461 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12462 "vgetq_lane");
12463 case NEON::BI__builtin_neon_vget_lane_f32:
12464 Ops[0] =
12465 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12466 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12467 "vget_lane");
12468 case NEON::BI__builtin_neon_vget_lane_f64:
12469 Ops[0] =
12470 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12471 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12472 "vget_lane");
12473 case NEON::BI__builtin_neon_vgetq_lane_f32:
12474 case NEON::BI__builtin_neon_vdups_laneq_f32:
12475 Ops[0] =
12476 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12477 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12478 "vgetq_lane");
12479 case NEON::BI__builtin_neon_vgetq_lane_f64:
12480 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12481 Ops[0] =
12482 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12483 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12484 "vgetq_lane");
12485 case NEON::BI__builtin_neon_vaddh_f16:
12486 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12487 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12488 case NEON::BI__builtin_neon_vsubh_f16:
12489 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12490 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12491 case NEON::BI__builtin_neon_vmulh_f16:
12492 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12493 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12494 case NEON::BI__builtin_neon_vdivh_f16:
12495 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12496 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12497 case NEON::BI__builtin_neon_vfmah_f16:
12498 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12500 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12501 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12502 case NEON::BI__builtin_neon_vfmsh_f16: {
12503 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12504
12505 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12507 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12508 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12509 }
12510 case NEON::BI__builtin_neon_vaddd_s64:
12511 case NEON::BI__builtin_neon_vaddd_u64:
12512 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12513 case NEON::BI__builtin_neon_vsubd_s64:
12514 case NEON::BI__builtin_neon_vsubd_u64:
12515 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12516 case NEON::BI__builtin_neon_vqdmlalh_s16:
12517 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12518 SmallVector<Value *, 2> ProductOps;
12519 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12520 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12521 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12522 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12523 ProductOps, "vqdmlXl");
12524 Constant *CI = ConstantInt::get(SizeTy, 0);
12525 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12526
12527 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12528 ? Intrinsic::aarch64_neon_sqadd
12529 : Intrinsic::aarch64_neon_sqsub;
12530 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12531 }
12532 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12533 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12534 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12535 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12536 Ops, "vqshlu_n");
12537 }
12538 case NEON::BI__builtin_neon_vqshld_n_u64:
12539 case NEON::BI__builtin_neon_vqshld_n_s64: {
12540 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12541 ? Intrinsic::aarch64_neon_uqshl
12542 : Intrinsic::aarch64_neon_sqshl;
12543 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12544 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12545 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12546 }
12547 case NEON::BI__builtin_neon_vrshrd_n_u64:
12548 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12549 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12550 ? Intrinsic::aarch64_neon_urshl
12551 : Intrinsic::aarch64_neon_srshl;
12552 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12553 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12554 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12555 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12556 }
12557 case NEON::BI__builtin_neon_vrsrad_n_u64:
12558 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12559 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12560 ? Intrinsic::aarch64_neon_urshl
12561 : Intrinsic::aarch64_neon_srshl;
12562 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12563 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12564 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12565 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12566 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12567 }
12568 case NEON::BI__builtin_neon_vshld_n_s64:
12569 case NEON::BI__builtin_neon_vshld_n_u64: {
12570 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12571 return Builder.CreateShl(
12572 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12573 }
12574 case NEON::BI__builtin_neon_vshrd_n_s64: {
12575 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12576 return Builder.CreateAShr(
12577 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12578 Amt->getZExtValue())),
12579 "shrd_n");
12580 }
12581 case NEON::BI__builtin_neon_vshrd_n_u64: {
12582 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12583 uint64_t ShiftAmt = Amt->getZExtValue();
12584 // Right-shifting an unsigned value by its size yields 0.
12585 if (ShiftAmt == 64)
12586 return ConstantInt::get(Int64Ty, 0);
12587 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12588 "shrd_n");
12589 }
12590 case NEON::BI__builtin_neon_vsrad_n_s64: {
12591 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12592 Ops[1] = Builder.CreateAShr(
12593 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12594 Amt->getZExtValue())),
12595 "shrd_n");
12596 return Builder.CreateAdd(Ops[0], Ops[1]);
12597 }
12598 case NEON::BI__builtin_neon_vsrad_n_u64: {
12599 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12600 uint64_t ShiftAmt = Amt->getZExtValue();
12601 // Right-shifting an unsigned value by its size yields 0.
12602 // As Op + 0 = Op, return Ops[0] directly.
12603 if (ShiftAmt == 64)
12604 return Ops[0];
12605 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12606 "shrd_n");
12607 return Builder.CreateAdd(Ops[0], Ops[1]);
12608 }
12609 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12610 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12611 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12612 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12613 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12614 "lane");
12615 SmallVector<Value *, 2> ProductOps;
12616 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12617 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12618 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12619 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12620 ProductOps, "vqdmlXl");
12621 Constant *CI = ConstantInt::get(SizeTy, 0);
12622 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12623 Ops.pop_back();
12624
12625 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12626 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12627 ? Intrinsic::aarch64_neon_sqadd
12628 : Intrinsic::aarch64_neon_sqsub;
12629 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12630 }
12631 case NEON::BI__builtin_neon_vqdmlals_s32:
12632 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12633 SmallVector<Value *, 2> ProductOps;
12634 ProductOps.push_back(Ops[1]);
12635 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12636 Ops[1] =
12637 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12638 ProductOps, "vqdmlXl");
12639
12640 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12641 ? Intrinsic::aarch64_neon_sqadd
12642 : Intrinsic::aarch64_neon_sqsub;
12643 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12644 }
12645 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12646 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12647 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12648 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12649 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12650 "lane");
12651 SmallVector<Value *, 2> ProductOps;
12652 ProductOps.push_back(Ops[1]);
12653 ProductOps.push_back(Ops[2]);
12654 Ops[1] =
12655 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12656 ProductOps, "vqdmlXl");
12657 Ops.pop_back();
12658
12659 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12660 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12661 ? Intrinsic::aarch64_neon_sqadd
12662 : Intrinsic::aarch64_neon_sqsub;
12663 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12664 }
12665 case NEON::BI__builtin_neon_vget_lane_bf16:
12666 case NEON::BI__builtin_neon_vduph_lane_bf16:
12667 case NEON::BI__builtin_neon_vduph_lane_f16: {
12668 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12669 "vget_lane");
12670 }
12671 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12672 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12673 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12674 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12675 "vgetq_lane");
12676 }
12677 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
12678 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12679 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12680 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12681 }
12682 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
12683 SmallVector<int, 16> ConcatMask(8);
12684 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12685 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12686 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12687 llvm::Value *Trunc =
12688 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12689 return Builder.CreateShuffleVector(
12690 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
12691 }
12692 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
12693 SmallVector<int, 16> ConcatMask(8);
12694 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12695 SmallVector<int, 16> LoMask(4);
12696 std::iota(LoMask.begin(), LoMask.end(), 0);
12697 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12698 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12699 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
12700 llvm::Value *Inactive = Builder.CreateShuffleVector(
12701 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
12702 llvm::Value *Trunc =
12703 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
12704 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
12705 }
12706
12707 case clang::AArch64::BI_InterlockedAdd:
12708 case clang::AArch64::BI_InterlockedAdd64: {
12709 Address DestAddr = CheckAtomicAlignment(*this, E);
12710 Value *Val = EmitScalarExpr(E->getArg(1));
12711 AtomicRMWInst *RMWI =
12712 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12713 llvm::AtomicOrdering::SequentiallyConsistent);
12714 return Builder.CreateAdd(RMWI, Val);
12715 }
12716 }
12717
12718 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12719 llvm::Type *Ty = VTy;
12720 if (!Ty)
12721 return nullptr;
12722
12723 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12724 // defer to common code if it's been added to our special map.
12727
12728 if (Builtin)
12730 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12731 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12732 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12733
12734 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12735 return V;
12736
12737 unsigned Int;
12738 switch (BuiltinID) {
12739 default: return nullptr;
12740 case NEON::BI__builtin_neon_vbsl_v:
12741 case NEON::BI__builtin_neon_vbslq_v: {
12742 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12743 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12744 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12745 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12746
12747 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12748 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12749 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12750 return Builder.CreateBitCast(Ops[0], Ty);
12751 }
12752 case NEON::BI__builtin_neon_vfma_lane_v:
12753 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12754 // The ARM builtins (and instructions) have the addend as the first
12755 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12756 Value *Addend = Ops[0];
12757 Value *Multiplicand = Ops[1];
12758 Value *LaneSource = Ops[2];
12759 Ops[0] = Multiplicand;
12760 Ops[1] = LaneSource;
12761 Ops[2] = Addend;
12762
12763 // Now adjust things to handle the lane access.
12764 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12765 ? llvm::FixedVectorType::get(VTy->getElementType(),
12766 VTy->getNumElements() / 2)
12767 : VTy;
12768 llvm::Constant *cst = cast<Constant>(Ops[3]);
12769 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12770 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12771 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12772
12773 Ops.pop_back();
12774 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12775 : Intrinsic::fma;
12776 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12777 }
12778 case NEON::BI__builtin_neon_vfma_laneq_v: {
12779 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12780 // v1f64 fma should be mapped to Neon scalar f64 fma
12781 if (VTy && VTy->getElementType() == DoubleTy) {
12782 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12783 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12784 llvm::FixedVectorType *VTy =
12786 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12787 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12788 Value *Result;
12790 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12791 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12792 return Builder.CreateBitCast(Result, Ty);
12793 }
12794 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12795 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12796
12797 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12798 VTy->getNumElements() * 2);
12799 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12800 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12801 cast<ConstantInt>(Ops[3]));
12802 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12803
12805 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12806 {Ops[2], Ops[1], Ops[0]});
12807 }
12808 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12809 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12810 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12811
12812 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12813 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12815 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12816 {Ops[2], Ops[1], Ops[0]});
12817 }
12818 case NEON::BI__builtin_neon_vfmah_lane_f16:
12819 case NEON::BI__builtin_neon_vfmas_lane_f32:
12820 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12821 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12822 case NEON::BI__builtin_neon_vfmad_lane_f64:
12823 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12824 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12825 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12826 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12828 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12829 {Ops[1], Ops[2], Ops[0]});
12830 }
12831 case NEON::BI__builtin_neon_vmull_v:
12832 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12833 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12834 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12835 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12836 case NEON::BI__builtin_neon_vmax_v:
12837 case NEON::BI__builtin_neon_vmaxq_v:
12838 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12839 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12840 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12841 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12842 case NEON::BI__builtin_neon_vmaxh_f16: {
12843 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12844 Int = Intrinsic::aarch64_neon_fmax;
12845 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12846 }
12847 case NEON::BI__builtin_neon_vmin_v:
12848 case NEON::BI__builtin_neon_vminq_v:
12849 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12850 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12851 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12852 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12853 case NEON::BI__builtin_neon_vminh_f16: {
12854 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12855 Int = Intrinsic::aarch64_neon_fmin;
12856 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12857 }
12858 case NEON::BI__builtin_neon_vabd_v:
12859 case NEON::BI__builtin_neon_vabdq_v:
12860 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12861 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12862 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12863 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12864 case NEON::BI__builtin_neon_vpadal_v:
12865 case NEON::BI__builtin_neon_vpadalq_v: {
12866 unsigned ArgElts = VTy->getNumElements();
12867 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12868 unsigned BitWidth = EltTy->getBitWidth();
12869 auto *ArgTy = llvm::FixedVectorType::get(
12870 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12871 llvm::Type* Tys[2] = { VTy, ArgTy };
12872 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12874 TmpOps.push_back(Ops[1]);
12875 Function *F = CGM.getIntrinsic(Int, Tys);
12876 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12877 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12878 return Builder.CreateAdd(tmp, addend);
12879 }
12880 case NEON::BI__builtin_neon_vpmin_v:
12881 case NEON::BI__builtin_neon_vpminq_v:
12882 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12883 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12884 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12885 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12886 case NEON::BI__builtin_neon_vpmax_v:
12887 case NEON::BI__builtin_neon_vpmaxq_v:
12888 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12889 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12890 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12891 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12892 case NEON::BI__builtin_neon_vminnm_v:
12893 case NEON::BI__builtin_neon_vminnmq_v:
12894 Int = Intrinsic::aarch64_neon_fminnm;
12895 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12896 case NEON::BI__builtin_neon_vminnmh_f16:
12897 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12898 Int = Intrinsic::aarch64_neon_fminnm;
12899 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12900 case NEON::BI__builtin_neon_vmaxnm_v:
12901 case NEON::BI__builtin_neon_vmaxnmq_v:
12902 Int = Intrinsic::aarch64_neon_fmaxnm;
12903 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12904 case NEON::BI__builtin_neon_vmaxnmh_f16:
12905 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12906 Int = Intrinsic::aarch64_neon_fmaxnm;
12907 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12908 case NEON::BI__builtin_neon_vrecpss_f32: {
12909 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12910 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12911 Ops, "vrecps");
12912 }
12913 case NEON::BI__builtin_neon_vrecpsd_f64:
12914 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12915 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12916 Ops, "vrecps");
12917 case NEON::BI__builtin_neon_vrecpsh_f16:
12918 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12919 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12920 Ops, "vrecps");
12921 case NEON::BI__builtin_neon_vqshrun_n_v:
12922 Int = Intrinsic::aarch64_neon_sqshrun;
12923 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12924 case NEON::BI__builtin_neon_vqrshrun_n_v:
12925 Int = Intrinsic::aarch64_neon_sqrshrun;
12926 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12927 case NEON::BI__builtin_neon_vqshrn_n_v:
12928 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12929 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12930 case NEON::BI__builtin_neon_vrshrn_n_v:
12931 Int = Intrinsic::aarch64_neon_rshrn;
12932 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12933 case NEON::BI__builtin_neon_vqrshrn_n_v:
12934 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12935 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12936 case NEON::BI__builtin_neon_vrndah_f16: {
12937 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12938 Int = Builder.getIsFPConstrained()
12939 ? Intrinsic::experimental_constrained_round
12940 : Intrinsic::round;
12941 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12942 }
12943 case NEON::BI__builtin_neon_vrnda_v:
12944 case NEON::BI__builtin_neon_vrndaq_v: {
12945 Int = Builder.getIsFPConstrained()
12946 ? Intrinsic::experimental_constrained_round
12947 : Intrinsic::round;
12948 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12949 }
12950 case NEON::BI__builtin_neon_vrndih_f16: {
12951 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12952 Int = Builder.getIsFPConstrained()
12953 ? Intrinsic::experimental_constrained_nearbyint
12954 : Intrinsic::nearbyint;
12955 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12956 }
12957 case NEON::BI__builtin_neon_vrndmh_f16: {
12958 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12959 Int = Builder.getIsFPConstrained()
12960 ? Intrinsic::experimental_constrained_floor
12961 : Intrinsic::floor;
12962 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12963 }
12964 case NEON::BI__builtin_neon_vrndm_v:
12965 case NEON::BI__builtin_neon_vrndmq_v: {
12966 Int = Builder.getIsFPConstrained()
12967 ? Intrinsic::experimental_constrained_floor
12968 : Intrinsic::floor;
12969 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12970 }
12971 case NEON::BI__builtin_neon_vrndnh_f16: {
12972 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12973 Int = Builder.getIsFPConstrained()
12974 ? Intrinsic::experimental_constrained_roundeven
12975 : Intrinsic::roundeven;
12976 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12977 }
12978 case NEON::BI__builtin_neon_vrndn_v:
12979 case NEON::BI__builtin_neon_vrndnq_v: {
12980 Int = Builder.getIsFPConstrained()
12981 ? Intrinsic::experimental_constrained_roundeven
12982 : Intrinsic::roundeven;
12983 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12984 }
12985 case NEON::BI__builtin_neon_vrndns_f32: {
12986 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12987 Int = Builder.getIsFPConstrained()
12988 ? Intrinsic::experimental_constrained_roundeven
12989 : Intrinsic::roundeven;
12990 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12991 }
12992 case NEON::BI__builtin_neon_vrndph_f16: {
12993 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12994 Int = Builder.getIsFPConstrained()
12995 ? Intrinsic::experimental_constrained_ceil
12996 : Intrinsic::ceil;
12997 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12998 }
12999 case NEON::BI__builtin_neon_vrndp_v:
13000 case NEON::BI__builtin_neon_vrndpq_v: {
13001 Int = Builder.getIsFPConstrained()
13002 ? Intrinsic::experimental_constrained_ceil
13003 : Intrinsic::ceil;
13004 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
13005 }
13006 case NEON::BI__builtin_neon_vrndxh_f16: {
13007 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13008 Int = Builder.getIsFPConstrained()
13009 ? Intrinsic::experimental_constrained_rint
13010 : Intrinsic::rint;
13011 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
13012 }
13013 case NEON::BI__builtin_neon_vrndx_v:
13014 case NEON::BI__builtin_neon_vrndxq_v: {
13015 Int = Builder.getIsFPConstrained()
13016 ? Intrinsic::experimental_constrained_rint
13017 : Intrinsic::rint;
13018 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
13019 }
13020 case NEON::BI__builtin_neon_vrndh_f16: {
13021 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13022 Int = Builder.getIsFPConstrained()
13023 ? Intrinsic::experimental_constrained_trunc
13024 : Intrinsic::trunc;
13025 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
13026 }
13027 case NEON::BI__builtin_neon_vrnd32x_f32:
13028 case NEON::BI__builtin_neon_vrnd32xq_f32:
13029 case NEON::BI__builtin_neon_vrnd32x_f64:
13030 case NEON::BI__builtin_neon_vrnd32xq_f64: {
13031 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13032 Int = Intrinsic::aarch64_neon_frint32x;
13033 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
13034 }
13035 case NEON::BI__builtin_neon_vrnd32z_f32:
13036 case NEON::BI__builtin_neon_vrnd32zq_f32:
13037 case NEON::BI__builtin_neon_vrnd32z_f64:
13038 case NEON::BI__builtin_neon_vrnd32zq_f64: {
13039 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13040 Int = Intrinsic::aarch64_neon_frint32z;
13041 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
13042 }
13043 case NEON::BI__builtin_neon_vrnd64x_f32:
13044 case NEON::BI__builtin_neon_vrnd64xq_f32:
13045 case NEON::BI__builtin_neon_vrnd64x_f64:
13046 case NEON::BI__builtin_neon_vrnd64xq_f64: {
13047 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13048 Int = Intrinsic::aarch64_neon_frint64x;
13049 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
13050 }
13051 case NEON::BI__builtin_neon_vrnd64z_f32:
13052 case NEON::BI__builtin_neon_vrnd64zq_f32:
13053 case NEON::BI__builtin_neon_vrnd64z_f64:
13054 case NEON::BI__builtin_neon_vrnd64zq_f64: {
13055 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13056 Int = Intrinsic::aarch64_neon_frint64z;
13057 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
13058 }
13059 case NEON::BI__builtin_neon_vrnd_v:
13060 case NEON::BI__builtin_neon_vrndq_v: {
13061 Int = Builder.getIsFPConstrained()
13062 ? Intrinsic::experimental_constrained_trunc
13063 : Intrinsic::trunc;
13064 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
13065 }
13066 case NEON::BI__builtin_neon_vcvt_f64_v:
13067 case NEON::BI__builtin_neon_vcvtq_f64_v:
13068 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13069 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
13070 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
13071 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
13072 case NEON::BI__builtin_neon_vcvt_f64_f32: {
13073 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
13074 "unexpected vcvt_f64_f32 builtin");
13075 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
13076 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13077
13078 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
13079 }
13080 case NEON::BI__builtin_neon_vcvt_f32_f64: {
13081 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
13082 "unexpected vcvt_f32_f64 builtin");
13083 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
13084 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13085
13086 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
13087 }
13088 case NEON::BI__builtin_neon_vcvt_s32_v:
13089 case NEON::BI__builtin_neon_vcvt_u32_v:
13090 case NEON::BI__builtin_neon_vcvt_s64_v:
13091 case NEON::BI__builtin_neon_vcvt_u64_v:
13092 case NEON::BI__builtin_neon_vcvt_s16_f16:
13093 case NEON::BI__builtin_neon_vcvt_u16_f16:
13094 case NEON::BI__builtin_neon_vcvtq_s32_v:
13095 case NEON::BI__builtin_neon_vcvtq_u32_v:
13096 case NEON::BI__builtin_neon_vcvtq_s64_v:
13097 case NEON::BI__builtin_neon_vcvtq_u64_v:
13098 case NEON::BI__builtin_neon_vcvtq_s16_f16:
13099 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
13100 Int =
13101 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
13102 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
13103 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
13104 }
13105 case NEON::BI__builtin_neon_vcvta_s16_f16:
13106 case NEON::BI__builtin_neon_vcvta_u16_f16:
13107 case NEON::BI__builtin_neon_vcvta_s32_v:
13108 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
13109 case NEON::BI__builtin_neon_vcvtaq_s32_v:
13110 case NEON::BI__builtin_neon_vcvta_u32_v:
13111 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
13112 case NEON::BI__builtin_neon_vcvtaq_u32_v:
13113 case NEON::BI__builtin_neon_vcvta_s64_v:
13114 case NEON::BI__builtin_neon_vcvtaq_s64_v:
13115 case NEON::BI__builtin_neon_vcvta_u64_v:
13116 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
13117 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
13118 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13119 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
13120 }
13121 case NEON::BI__builtin_neon_vcvtm_s16_f16:
13122 case NEON::BI__builtin_neon_vcvtm_s32_v:
13123 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
13124 case NEON::BI__builtin_neon_vcvtmq_s32_v:
13125 case NEON::BI__builtin_neon_vcvtm_u16_f16:
13126 case NEON::BI__builtin_neon_vcvtm_u32_v:
13127 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
13128 case NEON::BI__builtin_neon_vcvtmq_u32_v:
13129 case NEON::BI__builtin_neon_vcvtm_s64_v:
13130 case NEON::BI__builtin_neon_vcvtmq_s64_v:
13131 case NEON::BI__builtin_neon_vcvtm_u64_v:
13132 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
13133 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
13134 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13135 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
13136 }
13137 case NEON::BI__builtin_neon_vcvtn_s16_f16:
13138 case NEON::BI__builtin_neon_vcvtn_s32_v:
13139 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
13140 case NEON::BI__builtin_neon_vcvtnq_s32_v:
13141 case NEON::BI__builtin_neon_vcvtn_u16_f16:
13142 case NEON::BI__builtin_neon_vcvtn_u32_v:
13143 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
13144 case NEON::BI__builtin_neon_vcvtnq_u32_v:
13145 case NEON::BI__builtin_neon_vcvtn_s64_v:
13146 case NEON::BI__builtin_neon_vcvtnq_s64_v:
13147 case NEON::BI__builtin_neon_vcvtn_u64_v:
13148 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
13149 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
13150 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13151 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
13152 }
13153 case NEON::BI__builtin_neon_vcvtp_s16_f16:
13154 case NEON::BI__builtin_neon_vcvtp_s32_v:
13155 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
13156 case NEON::BI__builtin_neon_vcvtpq_s32_v:
13157 case NEON::BI__builtin_neon_vcvtp_u16_f16:
13158 case NEON::BI__builtin_neon_vcvtp_u32_v:
13159 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
13160 case NEON::BI__builtin_neon_vcvtpq_u32_v:
13161 case NEON::BI__builtin_neon_vcvtp_s64_v:
13162 case NEON::BI__builtin_neon_vcvtpq_s64_v:
13163 case NEON::BI__builtin_neon_vcvtp_u64_v:
13164 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
13165 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
13166 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13167 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
13168 }
13169 case NEON::BI__builtin_neon_vmulx_v:
13170 case NEON::BI__builtin_neon_vmulxq_v: {
13171 Int = Intrinsic::aarch64_neon_fmulx;
13172 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
13173 }
13174 case NEON::BI__builtin_neon_vmulxh_lane_f16:
13175 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
13176 // vmulx_lane should be mapped to Neon scalar mulx after
13177 // extracting the scalar element
13178 Ops.push_back(EmitScalarExpr(E->getArg(2)));
13179 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13180 Ops.pop_back();
13181 Int = Intrinsic::aarch64_neon_fmulx;
13182 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
13183 }
13184 case NEON::BI__builtin_neon_vmul_lane_v:
13185 case NEON::BI__builtin_neon_vmul_laneq_v: {
13186 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
13187 bool Quad = false;
13188 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
13189 Quad = true;
13190 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13191 llvm::FixedVectorType *VTy =
13193 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13194 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13195 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
13196 return Builder.CreateBitCast(Result, Ty);
13197 }
13198 case NEON::BI__builtin_neon_vnegd_s64:
13199 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
13200 case NEON::BI__builtin_neon_vnegh_f16:
13201 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
13202 case NEON::BI__builtin_neon_vpmaxnm_v:
13203 case NEON::BI__builtin_neon_vpmaxnmq_v: {
13204 Int = Intrinsic::aarch64_neon_fmaxnmp;
13205 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
13206 }
13207 case NEON::BI__builtin_neon_vpminnm_v:
13208 case NEON::BI__builtin_neon_vpminnmq_v: {
13209 Int = Intrinsic::aarch64_neon_fminnmp;
13210 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
13211 }
13212 case NEON::BI__builtin_neon_vsqrth_f16: {
13213 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13214 Int = Builder.getIsFPConstrained()
13215 ? Intrinsic::experimental_constrained_sqrt
13216 : Intrinsic::sqrt;
13217 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
13218 }
13219 case NEON::BI__builtin_neon_vsqrt_v:
13220 case NEON::BI__builtin_neon_vsqrtq_v: {
13221 Int = Builder.getIsFPConstrained()
13222 ? Intrinsic::experimental_constrained_sqrt
13223 : Intrinsic::sqrt;
13224 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13225 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
13226 }
13227 case NEON::BI__builtin_neon_vrbit_v:
13228 case NEON::BI__builtin_neon_vrbitq_v: {
13229 Int = Intrinsic::bitreverse;
13230 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
13231 }
13232 case NEON::BI__builtin_neon_vaddv_u8:
13233 // FIXME: These are handled by the AArch64 scalar code.
13234 usgn = true;
13235 [[fallthrough]];
13236 case NEON::BI__builtin_neon_vaddv_s8: {
13237 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13238 Ty = Int32Ty;
13239 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13240 llvm::Type *Tys[2] = { Ty, VTy };
13241 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13242 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13243 return Builder.CreateTrunc(Ops[0], Int8Ty);
13244 }
13245 case NEON::BI__builtin_neon_vaddv_u16:
13246 usgn = true;
13247 [[fallthrough]];
13248 case NEON::BI__builtin_neon_vaddv_s16: {
13249 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13250 Ty = Int32Ty;
13251 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13252 llvm::Type *Tys[2] = { Ty, VTy };
13253 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13254 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13255 return Builder.CreateTrunc(Ops[0], Int16Ty);
13256 }
13257 case NEON::BI__builtin_neon_vaddvq_u8:
13258 usgn = true;
13259 [[fallthrough]];
13260 case NEON::BI__builtin_neon_vaddvq_s8: {
13261 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13262 Ty = Int32Ty;
13263 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13264 llvm::Type *Tys[2] = { Ty, VTy };
13265 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13266 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13267 return Builder.CreateTrunc(Ops[0], Int8Ty);
13268 }
13269 case NEON::BI__builtin_neon_vaddvq_u16:
13270 usgn = true;
13271 [[fallthrough]];
13272 case NEON::BI__builtin_neon_vaddvq_s16: {
13273 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13274 Ty = Int32Ty;
13275 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13276 llvm::Type *Tys[2] = { Ty, VTy };
13277 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13278 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13279 return Builder.CreateTrunc(Ops[0], Int16Ty);
13280 }
13281 case NEON::BI__builtin_neon_vmaxv_u8: {
13282 Int = Intrinsic::aarch64_neon_umaxv;
13283 Ty = Int32Ty;
13284 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13285 llvm::Type *Tys[2] = { Ty, VTy };
13286 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13287 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13288 return Builder.CreateTrunc(Ops[0], Int8Ty);
13289 }
13290 case NEON::BI__builtin_neon_vmaxv_u16: {
13291 Int = Intrinsic::aarch64_neon_umaxv;
13292 Ty = Int32Ty;
13293 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13294 llvm::Type *Tys[2] = { Ty, VTy };
13295 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13296 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13297 return Builder.CreateTrunc(Ops[0], Int16Ty);
13298 }
13299 case NEON::BI__builtin_neon_vmaxvq_u8: {
13300 Int = Intrinsic::aarch64_neon_umaxv;
13301 Ty = Int32Ty;
13302 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13303 llvm::Type *Tys[2] = { Ty, VTy };
13304 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13305 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13306 return Builder.CreateTrunc(Ops[0], Int8Ty);
13307 }
13308 case NEON::BI__builtin_neon_vmaxvq_u16: {
13309 Int = Intrinsic::aarch64_neon_umaxv;
13310 Ty = Int32Ty;
13311 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13312 llvm::Type *Tys[2] = { Ty, VTy };
13313 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13314 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13315 return Builder.CreateTrunc(Ops[0], Int16Ty);
13316 }
13317 case NEON::BI__builtin_neon_vmaxv_s8: {
13318 Int = Intrinsic::aarch64_neon_smaxv;
13319 Ty = Int32Ty;
13320 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13321 llvm::Type *Tys[2] = { Ty, VTy };
13322 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13323 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13324 return Builder.CreateTrunc(Ops[0], Int8Ty);
13325 }
13326 case NEON::BI__builtin_neon_vmaxv_s16: {
13327 Int = Intrinsic::aarch64_neon_smaxv;
13328 Ty = Int32Ty;
13329 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13330 llvm::Type *Tys[2] = { Ty, VTy };
13331 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13332 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13333 return Builder.CreateTrunc(Ops[0], Int16Ty);
13334 }
13335 case NEON::BI__builtin_neon_vmaxvq_s8: {
13336 Int = Intrinsic::aarch64_neon_smaxv;
13337 Ty = Int32Ty;
13338 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13339 llvm::Type *Tys[2] = { Ty, VTy };
13340 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13341 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13342 return Builder.CreateTrunc(Ops[0], Int8Ty);
13343 }
13344 case NEON::BI__builtin_neon_vmaxvq_s16: {
13345 Int = Intrinsic::aarch64_neon_smaxv;
13346 Ty = Int32Ty;
13347 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13348 llvm::Type *Tys[2] = { Ty, VTy };
13349 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13350 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13351 return Builder.CreateTrunc(Ops[0], Int16Ty);
13352 }
13353 case NEON::BI__builtin_neon_vmaxv_f16: {
13354 Int = Intrinsic::aarch64_neon_fmaxv;
13355 Ty = HalfTy;
13356 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13357 llvm::Type *Tys[2] = { Ty, VTy };
13358 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13359 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13360 return Builder.CreateTrunc(Ops[0], HalfTy);
13361 }
13362 case NEON::BI__builtin_neon_vmaxvq_f16: {
13363 Int = Intrinsic::aarch64_neon_fmaxv;
13364 Ty = HalfTy;
13365 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13366 llvm::Type *Tys[2] = { Ty, VTy };
13367 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13368 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13369 return Builder.CreateTrunc(Ops[0], HalfTy);
13370 }
13371 case NEON::BI__builtin_neon_vminv_u8: {
13372 Int = Intrinsic::aarch64_neon_uminv;
13373 Ty = Int32Ty;
13374 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13375 llvm::Type *Tys[2] = { Ty, VTy };
13376 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13377 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13378 return Builder.CreateTrunc(Ops[0], Int8Ty);
13379 }
13380 case NEON::BI__builtin_neon_vminv_u16: {
13381 Int = Intrinsic::aarch64_neon_uminv;
13382 Ty = Int32Ty;
13383 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13384 llvm::Type *Tys[2] = { Ty, VTy };
13385 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13386 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13387 return Builder.CreateTrunc(Ops[0], Int16Ty);
13388 }
13389 case NEON::BI__builtin_neon_vminvq_u8: {
13390 Int = Intrinsic::aarch64_neon_uminv;
13391 Ty = Int32Ty;
13392 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13393 llvm::Type *Tys[2] = { Ty, VTy };
13394 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13395 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13396 return Builder.CreateTrunc(Ops[0], Int8Ty);
13397 }
13398 case NEON::BI__builtin_neon_vminvq_u16: {
13399 Int = Intrinsic::aarch64_neon_uminv;
13400 Ty = Int32Ty;
13401 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13402 llvm::Type *Tys[2] = { Ty, VTy };
13403 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13404 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13405 return Builder.CreateTrunc(Ops[0], Int16Ty);
13406 }
13407 case NEON::BI__builtin_neon_vminv_s8: {
13408 Int = Intrinsic::aarch64_neon_sminv;
13409 Ty = Int32Ty;
13410 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13411 llvm::Type *Tys[2] = { Ty, VTy };
13412 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13413 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13414 return Builder.CreateTrunc(Ops[0], Int8Ty);
13415 }
13416 case NEON::BI__builtin_neon_vminv_s16: {
13417 Int = Intrinsic::aarch64_neon_sminv;
13418 Ty = Int32Ty;
13419 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13420 llvm::Type *Tys[2] = { Ty, VTy };
13421 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13422 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13423 return Builder.CreateTrunc(Ops[0], Int16Ty);
13424 }
13425 case NEON::BI__builtin_neon_vminvq_s8: {
13426 Int = Intrinsic::aarch64_neon_sminv;
13427 Ty = Int32Ty;
13428 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13429 llvm::Type *Tys[2] = { Ty, VTy };
13430 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13431 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13432 return Builder.CreateTrunc(Ops[0], Int8Ty);
13433 }
13434 case NEON::BI__builtin_neon_vminvq_s16: {
13435 Int = Intrinsic::aarch64_neon_sminv;
13436 Ty = Int32Ty;
13437 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13438 llvm::Type *Tys[2] = { Ty, VTy };
13439 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13440 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13441 return Builder.CreateTrunc(Ops[0], Int16Ty);
13442 }
13443 case NEON::BI__builtin_neon_vminv_f16: {
13444 Int = Intrinsic::aarch64_neon_fminv;
13445 Ty = HalfTy;
13446 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13447 llvm::Type *Tys[2] = { Ty, VTy };
13448 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13449 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13450 return Builder.CreateTrunc(Ops[0], HalfTy);
13451 }
13452 case NEON::BI__builtin_neon_vminvq_f16: {
13453 Int = Intrinsic::aarch64_neon_fminv;
13454 Ty = HalfTy;
13455 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13456 llvm::Type *Tys[2] = { Ty, VTy };
13457 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13458 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13459 return Builder.CreateTrunc(Ops[0], HalfTy);
13460 }
13461 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13462 Int = Intrinsic::aarch64_neon_fmaxnmv;
13463 Ty = HalfTy;
13464 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13465 llvm::Type *Tys[2] = { Ty, VTy };
13466 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13467 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13468 return Builder.CreateTrunc(Ops[0], HalfTy);
13469 }
13470 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13471 Int = Intrinsic::aarch64_neon_fmaxnmv;
13472 Ty = HalfTy;
13473 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13474 llvm::Type *Tys[2] = { Ty, VTy };
13475 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13476 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13477 return Builder.CreateTrunc(Ops[0], HalfTy);
13478 }
13479 case NEON::BI__builtin_neon_vminnmv_f16: {
13480 Int = Intrinsic::aarch64_neon_fminnmv;
13481 Ty = HalfTy;
13482 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13483 llvm::Type *Tys[2] = { Ty, VTy };
13484 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13485 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13486 return Builder.CreateTrunc(Ops[0], HalfTy);
13487 }
13488 case NEON::BI__builtin_neon_vminnmvq_f16: {
13489 Int = Intrinsic::aarch64_neon_fminnmv;
13490 Ty = HalfTy;
13491 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13492 llvm::Type *Tys[2] = { Ty, VTy };
13493 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13494 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13495 return Builder.CreateTrunc(Ops[0], HalfTy);
13496 }
13497 case NEON::BI__builtin_neon_vmul_n_f64: {
13498 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13499 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13500 return Builder.CreateFMul(Ops[0], RHS);
13501 }
13502 case NEON::BI__builtin_neon_vaddlv_u8: {
13503 Int = Intrinsic::aarch64_neon_uaddlv;
13504 Ty = Int32Ty;
13505 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13506 llvm::Type *Tys[2] = { Ty, VTy };
13507 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13508 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13509 return Builder.CreateTrunc(Ops[0], Int16Ty);
13510 }
13511 case NEON::BI__builtin_neon_vaddlv_u16: {
13512 Int = Intrinsic::aarch64_neon_uaddlv;
13513 Ty = Int32Ty;
13514 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13515 llvm::Type *Tys[2] = { Ty, VTy };
13516 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13517 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13518 }
13519 case NEON::BI__builtin_neon_vaddlvq_u8: {
13520 Int = Intrinsic::aarch64_neon_uaddlv;
13521 Ty = Int32Ty;
13522 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13523 llvm::Type *Tys[2] = { Ty, VTy };
13524 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13525 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13526 return Builder.CreateTrunc(Ops[0], Int16Ty);
13527 }
13528 case NEON::BI__builtin_neon_vaddlvq_u16: {
13529 Int = Intrinsic::aarch64_neon_uaddlv;
13530 Ty = Int32Ty;
13531 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13532 llvm::Type *Tys[2] = { Ty, VTy };
13533 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13534 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13535 }
13536 case NEON::BI__builtin_neon_vaddlv_s8: {
13537 Int = Intrinsic::aarch64_neon_saddlv;
13538 Ty = Int32Ty;
13539 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13540 llvm::Type *Tys[2] = { Ty, VTy };
13541 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13542 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13543 return Builder.CreateTrunc(Ops[0], Int16Ty);
13544 }
13545 case NEON::BI__builtin_neon_vaddlv_s16: {
13546 Int = Intrinsic::aarch64_neon_saddlv;
13547 Ty = Int32Ty;
13548 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13549 llvm::Type *Tys[2] = { Ty, VTy };
13550 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13551 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13552 }
13553 case NEON::BI__builtin_neon_vaddlvq_s8: {
13554 Int = Intrinsic::aarch64_neon_saddlv;
13555 Ty = Int32Ty;
13556 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13557 llvm::Type *Tys[2] = { Ty, VTy };
13558 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13559 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13560 return Builder.CreateTrunc(Ops[0], Int16Ty);
13561 }
13562 case NEON::BI__builtin_neon_vaddlvq_s16: {
13563 Int = Intrinsic::aarch64_neon_saddlv;
13564 Ty = Int32Ty;
13565 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13566 llvm::Type *Tys[2] = { Ty, VTy };
13567 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13568 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13569 }
13570 case NEON::BI__builtin_neon_vsri_n_v:
13571 case NEON::BI__builtin_neon_vsriq_n_v: {
13572 Int = Intrinsic::aarch64_neon_vsri;
13573 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13574 return EmitNeonCall(Intrin, Ops, "vsri_n");
13575 }
13576 case NEON::BI__builtin_neon_vsli_n_v:
13577 case NEON::BI__builtin_neon_vsliq_n_v: {
13578 Int = Intrinsic::aarch64_neon_vsli;
13579 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13580 return EmitNeonCall(Intrin, Ops, "vsli_n");
13581 }
13582 case NEON::BI__builtin_neon_vsra_n_v:
13583 case NEON::BI__builtin_neon_vsraq_n_v:
13584 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13585 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13586 return Builder.CreateAdd(Ops[0], Ops[1]);
13587 case NEON::BI__builtin_neon_vrsra_n_v:
13588 case NEON::BI__builtin_neon_vrsraq_n_v: {
13589 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13591 TmpOps.push_back(Ops[1]);
13592 TmpOps.push_back(Ops[2]);
13593 Function* F = CGM.getIntrinsic(Int, Ty);
13594 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13595 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13596 return Builder.CreateAdd(Ops[0], tmp);
13597 }
13598 case NEON::BI__builtin_neon_vld1_v:
13599 case NEON::BI__builtin_neon_vld1q_v: {
13600 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13601 }
13602 case NEON::BI__builtin_neon_vst1_v:
13603 case NEON::BI__builtin_neon_vst1q_v:
13604 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13605 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13606 case NEON::BI__builtin_neon_vld1_lane_v:
13607 case NEON::BI__builtin_neon_vld1q_lane_v: {
13608 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13609 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13610 PtrOp0.getAlignment());
13611 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13612 }
13613 case NEON::BI__builtin_neon_vldap1_lane_s64:
13614 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13615 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13616 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13617 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13618 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13619 Ops[0] = LI;
13620 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13621 }
13622 case NEON::BI__builtin_neon_vld1_dup_v:
13623 case NEON::BI__builtin_neon_vld1q_dup_v: {
13624 Value *V = PoisonValue::get(Ty);
13625 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13626 PtrOp0.getAlignment());
13627 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13628 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13629 return EmitNeonSplat(Ops[0], CI);
13630 }
13631 case NEON::BI__builtin_neon_vst1_lane_v:
13632 case NEON::BI__builtin_neon_vst1q_lane_v:
13633 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13634 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13635 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13636 case NEON::BI__builtin_neon_vstl1_lane_s64:
13637 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13638 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13639 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13640 llvm::StoreInst *SI =
13641 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13642 SI->setAtomic(llvm::AtomicOrdering::Release);
13643 return SI;
13644 }
13645 case NEON::BI__builtin_neon_vld2_v:
13646 case NEON::BI__builtin_neon_vld2q_v: {
13647 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13648 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13649 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13650 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13651 }
13652 case NEON::BI__builtin_neon_vld3_v:
13653 case NEON::BI__builtin_neon_vld3q_v: {
13654 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13655 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13656 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13657 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13658 }
13659 case NEON::BI__builtin_neon_vld4_v:
13660 case NEON::BI__builtin_neon_vld4q_v: {
13661 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13662 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13663 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13664 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13665 }
13666 case NEON::BI__builtin_neon_vld2_dup_v:
13667 case NEON::BI__builtin_neon_vld2q_dup_v: {
13668 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13669 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13670 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13671 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13672 }
13673 case NEON::BI__builtin_neon_vld3_dup_v:
13674 case NEON::BI__builtin_neon_vld3q_dup_v: {
13675 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13676 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13677 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13678 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13679 }
13680 case NEON::BI__builtin_neon_vld4_dup_v:
13681 case NEON::BI__builtin_neon_vld4q_dup_v: {
13682 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13683 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13684 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13685 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13686 }
13687 case NEON::BI__builtin_neon_vld2_lane_v:
13688 case NEON::BI__builtin_neon_vld2q_lane_v: {
13689 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13690 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13691 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13692 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13693 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13694 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13695 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13696 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13697 }
13698 case NEON::BI__builtin_neon_vld3_lane_v:
13699 case NEON::BI__builtin_neon_vld3q_lane_v: {
13700 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13701 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13702 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13703 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13704 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13705 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13706 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13707 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13708 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13709 }
13710 case NEON::BI__builtin_neon_vld4_lane_v:
13711 case NEON::BI__builtin_neon_vld4q_lane_v: {
13712 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13713 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13714 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13715 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13716 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13717 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13718 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13719 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13720 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13721 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13722 }
13723 case NEON::BI__builtin_neon_vst2_v:
13724 case NEON::BI__builtin_neon_vst2q_v: {
13725 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13726 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13727 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13728 Ops, "");
13729 }
13730 case NEON::BI__builtin_neon_vst2_lane_v:
13731 case NEON::BI__builtin_neon_vst2q_lane_v: {
13732 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13733 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13734 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13735 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13736 Ops, "");
13737 }
13738 case NEON::BI__builtin_neon_vst3_v:
13739 case NEON::BI__builtin_neon_vst3q_v: {
13740 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13741 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13742 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13743 Ops, "");
13744 }
13745 case NEON::BI__builtin_neon_vst3_lane_v:
13746 case NEON::BI__builtin_neon_vst3q_lane_v: {
13747 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13748 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13749 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13750 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13751 Ops, "");
13752 }
13753 case NEON::BI__builtin_neon_vst4_v:
13754 case NEON::BI__builtin_neon_vst4q_v: {
13755 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13756 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13757 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13758 Ops, "");
13759 }
13760 case NEON::BI__builtin_neon_vst4_lane_v:
13761 case NEON::BI__builtin_neon_vst4q_lane_v: {
13762 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13763 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13764 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13765 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13766 Ops, "");
13767 }
13768 case NEON::BI__builtin_neon_vtrn_v:
13769 case NEON::BI__builtin_neon_vtrnq_v: {
13770 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13771 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13772 Value *SV = nullptr;
13773
13774 for (unsigned vi = 0; vi != 2; ++vi) {
13775 SmallVector<int, 16> Indices;
13776 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13777 Indices.push_back(i+vi);
13778 Indices.push_back(i+e+vi);
13779 }
13780 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13781 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13782 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13783 }
13784 return SV;
13785 }
13786 case NEON::BI__builtin_neon_vuzp_v:
13787 case NEON::BI__builtin_neon_vuzpq_v: {
13788 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13789 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13790 Value *SV = nullptr;
13791
13792 for (unsigned vi = 0; vi != 2; ++vi) {
13793 SmallVector<int, 16> Indices;
13794 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13795 Indices.push_back(2*i+vi);
13796
13797 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13798 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13799 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13800 }
13801 return SV;
13802 }
13803 case NEON::BI__builtin_neon_vzip_v:
13804 case NEON::BI__builtin_neon_vzipq_v: {
13805 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13806 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13807 Value *SV = nullptr;
13808
13809 for (unsigned vi = 0; vi != 2; ++vi) {
13810 SmallVector<int, 16> Indices;
13811 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13812 Indices.push_back((i + vi*e) >> 1);
13813 Indices.push_back(((i + vi*e) >> 1)+e);
13814 }
13815 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13816 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13817 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13818 }
13819 return SV;
13820 }
13821 case NEON::BI__builtin_neon_vqtbl1q_v: {
13822 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13823 Ops, "vtbl1");
13824 }
13825 case NEON::BI__builtin_neon_vqtbl2q_v: {
13826 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13827 Ops, "vtbl2");
13828 }
13829 case NEON::BI__builtin_neon_vqtbl3q_v: {
13830 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13831 Ops, "vtbl3");
13832 }
13833 case NEON::BI__builtin_neon_vqtbl4q_v: {
13834 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13835 Ops, "vtbl4");
13836 }
13837 case NEON::BI__builtin_neon_vqtbx1q_v: {
13838 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13839 Ops, "vtbx1");
13840 }
13841 case NEON::BI__builtin_neon_vqtbx2q_v: {
13842 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13843 Ops, "vtbx2");
13844 }
13845 case NEON::BI__builtin_neon_vqtbx3q_v: {
13846 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13847 Ops, "vtbx3");
13848 }
13849 case NEON::BI__builtin_neon_vqtbx4q_v: {
13850 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13851 Ops, "vtbx4");
13852 }
13853 case NEON::BI__builtin_neon_vsqadd_v:
13854 case NEON::BI__builtin_neon_vsqaddq_v: {
13855 Int = Intrinsic::aarch64_neon_usqadd;
13856 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13857 }
13858 case NEON::BI__builtin_neon_vuqadd_v:
13859 case NEON::BI__builtin_neon_vuqaddq_v: {
13860 Int = Intrinsic::aarch64_neon_suqadd;
13861 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13862 }
13863
13864 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
13865 case NEON::BI__builtin_neon_vluti2_laneq_f16:
13866 case NEON::BI__builtin_neon_vluti2_laneq_p16:
13867 case NEON::BI__builtin_neon_vluti2_laneq_p8:
13868 case NEON::BI__builtin_neon_vluti2_laneq_s16:
13869 case NEON::BI__builtin_neon_vluti2_laneq_s8:
13870 case NEON::BI__builtin_neon_vluti2_laneq_u16:
13871 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
13872 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13873 llvm::Type *Tys[2];
13874 Tys[0] = Ty;
13875 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13876 /*isQuad*/ false));
13877 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13878 }
13879 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
13880 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
13881 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
13882 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
13883 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
13884 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
13885 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
13886 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
13887 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13888 llvm::Type *Tys[2];
13889 Tys[0] = Ty;
13890 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13891 /*isQuad*/ true));
13892 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13893 }
13894 case NEON::BI__builtin_neon_vluti2_lane_bf16:
13895 case NEON::BI__builtin_neon_vluti2_lane_f16:
13896 case NEON::BI__builtin_neon_vluti2_lane_p16:
13897 case NEON::BI__builtin_neon_vluti2_lane_p8:
13898 case NEON::BI__builtin_neon_vluti2_lane_s16:
13899 case NEON::BI__builtin_neon_vluti2_lane_s8:
13900 case NEON::BI__builtin_neon_vluti2_lane_u16:
13901 case NEON::BI__builtin_neon_vluti2_lane_u8: {
13902 Int = Intrinsic::aarch64_neon_vluti2_lane;
13903 llvm::Type *Tys[2];
13904 Tys[0] = Ty;
13905 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13906 /*isQuad*/ false));
13907 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13908 }
13909 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
13910 case NEON::BI__builtin_neon_vluti2q_lane_f16:
13911 case NEON::BI__builtin_neon_vluti2q_lane_p16:
13912 case NEON::BI__builtin_neon_vluti2q_lane_p8:
13913 case NEON::BI__builtin_neon_vluti2q_lane_s16:
13914 case NEON::BI__builtin_neon_vluti2q_lane_s8:
13915 case NEON::BI__builtin_neon_vluti2q_lane_u16:
13916 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
13917 Int = Intrinsic::aarch64_neon_vluti2_lane;
13918 llvm::Type *Tys[2];
13919 Tys[0] = Ty;
13920 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13921 /*isQuad*/ true));
13922 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13923 }
13924 case NEON::BI__builtin_neon_vluti4q_lane_p8:
13925 case NEON::BI__builtin_neon_vluti4q_lane_s8:
13926 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
13927 Int = Intrinsic::aarch64_neon_vluti4q_lane;
13928 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
13929 }
13930 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
13931 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
13932 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
13933 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
13934 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
13935 }
13936 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
13937 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
13938 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
13939 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
13940 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
13941 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
13942 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
13943 }
13944 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
13945 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
13946 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
13947 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
13948 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
13949 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
13950 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
13951 }
13952
13953 case NEON::BI__builtin_neon_vamin_f16:
13954 case NEON::BI__builtin_neon_vaminq_f16:
13955 case NEON::BI__builtin_neon_vamin_f32:
13956 case NEON::BI__builtin_neon_vaminq_f32:
13957 case NEON::BI__builtin_neon_vaminq_f64: {
13958 Int = Intrinsic::aarch64_neon_famin;
13959 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
13960 }
13961 case NEON::BI__builtin_neon_vamax_f16:
13962 case NEON::BI__builtin_neon_vamaxq_f16:
13963 case NEON::BI__builtin_neon_vamax_f32:
13964 case NEON::BI__builtin_neon_vamaxq_f32:
13965 case NEON::BI__builtin_neon_vamaxq_f64: {
13966 Int = Intrinsic::aarch64_neon_famax;
13967 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
13968 }
13969 case NEON::BI__builtin_neon_vscale_f16:
13970 case NEON::BI__builtin_neon_vscaleq_f16:
13971 case NEON::BI__builtin_neon_vscale_f32:
13972 case NEON::BI__builtin_neon_vscaleq_f32:
13973 case NEON::BI__builtin_neon_vscaleq_f64: {
13974 Int = Intrinsic::aarch64_neon_fp8_fscale;
13975 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
13976 }
13977 }
13978}
13979
13980Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13981 const CallExpr *E) {
13982 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13983 BuiltinID == BPF::BI__builtin_btf_type_id ||
13984 BuiltinID == BPF::BI__builtin_preserve_type_info ||
13985 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13986 "unexpected BPF builtin");
13987
13988 // A sequence number, injected into IR builtin functions, to
13989 // prevent CSE given the only difference of the function
13990 // may just be the debuginfo metadata.
13991 static uint32_t BuiltinSeqNum;
13992
13993 switch (BuiltinID) {
13994 default:
13995 llvm_unreachable("Unexpected BPF builtin");
13996 case BPF::BI__builtin_preserve_field_info: {
13997 const Expr *Arg = E->getArg(0);
13998 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13999
14000 if (!getDebugInfo()) {
14001 CGM.Error(E->getExprLoc(),
14002 "using __builtin_preserve_field_info() without -g");
14003 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14004 : EmitLValue(Arg).emitRawPointer(*this);
14005 }
14006
14007 // Enable underlying preserve_*_access_index() generation.
14008 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
14009 IsInPreservedAIRegion = true;
14010 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14011 : EmitLValue(Arg).emitRawPointer(*this);
14012 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
14013
14014 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14015 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
14016
14017 // Built the IR for the preserve_field_info intrinsic.
14018 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getOrInsertDeclaration(
14019 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
14020 {FieldAddr->getType()});
14021 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
14022 }
14023 case BPF::BI__builtin_btf_type_id:
14024 case BPF::BI__builtin_preserve_type_info: {
14025 if (!getDebugInfo()) {
14026 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14027 return nullptr;
14028 }
14029
14030 const Expr *Arg0 = E->getArg(0);
14031 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14032 Arg0->getType(), Arg0->getExprLoc());
14033
14034 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14035 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14036 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14037
14038 llvm::Function *FnDecl;
14039 if (BuiltinID == BPF::BI__builtin_btf_type_id)
14040 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14041 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
14042 else
14043 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14044 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
14045 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
14046 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14047 return Fn;
14048 }
14049 case BPF::BI__builtin_preserve_enum_value: {
14050 if (!getDebugInfo()) {
14051 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14052 return nullptr;
14053 }
14054
14055 const Expr *Arg0 = E->getArg(0);
14056 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14057 Arg0->getType(), Arg0->getExprLoc());
14058
14059 // Find enumerator
14060 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
14061 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
14062 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
14063 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
14064
14065 auto InitVal = Enumerator->getInitVal();
14066 std::string InitValStr;
14067 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
14068 InitValStr = std::to_string(InitVal.getSExtValue());
14069 else
14070 InitValStr = std::to_string(InitVal.getZExtValue());
14071 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
14072 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
14073
14074 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14075 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14076 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14077
14078 llvm::Function *IntrinsicFn = llvm::Intrinsic::getOrInsertDeclaration(
14079 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
14080 CallInst *Fn =
14081 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
14082 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14083 return Fn;
14084 }
14085 }
14086}
14087
14088llvm::Value *CodeGenFunction::
14090 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
14091 "Not a power-of-two sized vector!");
14092 bool AllConstants = true;
14093 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
14094 AllConstants &= isa<Constant>(Ops[i]);
14095
14096 // If this is a constant vector, create a ConstantVector.
14097 if (AllConstants) {
14099 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14100 CstOps.push_back(cast<Constant>(Ops[i]));
14101 return llvm::ConstantVector::get(CstOps);
14102 }
14103
14104 // Otherwise, insertelement the values to build the vector.
14105 Value *Result = llvm::PoisonValue::get(
14106 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
14107
14108 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14109 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
14110
14111 return Result;
14112}
14113
14114// Convert the mask from an integer type to a vector of i1.
14116 unsigned NumElts) {
14117
14118 auto *MaskTy = llvm::FixedVectorType::get(
14119 CGF.Builder.getInt1Ty(),
14120 cast<IntegerType>(Mask->getType())->getBitWidth());
14121 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
14122
14123 // If we have less than 8 elements, then the starting mask was an i8 and
14124 // we need to extract down to the right number of elements.
14125 if (NumElts < 8) {
14126 int Indices[4];
14127 for (unsigned i = 0; i != NumElts; ++i)
14128 Indices[i] = i;
14129 MaskVec = CGF.Builder.CreateShuffleVector(
14130 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
14131 }
14132 return MaskVec;
14133}
14134
14136 Align Alignment) {
14137 Value *Ptr = Ops[0];
14138
14139 Value *MaskVec = getMaskVecValue(
14140 CGF, Ops[2],
14141 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
14142
14143 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
14144}
14145
14147 Align Alignment) {
14148 llvm::Type *Ty = Ops[1]->getType();
14149 Value *Ptr = Ops[0];
14150
14151 Value *MaskVec = getMaskVecValue(
14152 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
14153
14154 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
14155}
14156
14158 ArrayRef<Value *> Ops) {
14159 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
14160 Value *Ptr = Ops[0];
14161
14162 Value *MaskVec = getMaskVecValue(
14163 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
14164
14165 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
14166 ResultTy);
14167 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
14168}
14169
14172 bool IsCompress) {
14173 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14174
14175 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14176
14177 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
14178 : Intrinsic::x86_avx512_mask_expand;
14179 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
14180 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
14181}
14182
14184 ArrayRef<Value *> Ops) {
14185 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14186 Value *Ptr = Ops[0];
14187
14188 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14189
14190 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
14191 ResultTy);
14192 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
14193}
14194
14195static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
14197 bool InvertLHS = false) {
14198 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14199 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
14200 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
14201
14202 if (InvertLHS)
14203 LHS = CGF.Builder.CreateNot(LHS);
14204
14205 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
14206 Ops[0]->getType());
14207}
14208
14210 Value *Amt, bool IsRight) {
14211 llvm::Type *Ty = Op0->getType();
14212
14213 // Amount may be scalar immediate, in which case create a splat vector.
14214 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
14215 // we only care about the lowest log2 bits anyway.
14216 if (Amt->getType() != Ty) {
14217 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
14218 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
14219 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
14220 }
14221
14222 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
14223 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
14224 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
14225}
14226
14228 bool IsSigned) {
14229 Value *Op0 = Ops[0];
14230 Value *Op1 = Ops[1];
14231 llvm::Type *Ty = Op0->getType();
14232 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14233
14234 CmpInst::Predicate Pred;
14235 switch (Imm) {
14236 case 0x0:
14237 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
14238 break;
14239 case 0x1:
14240 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
14241 break;
14242 case 0x2:
14243 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
14244 break;
14245 case 0x3:
14246 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
14247 break;
14248 case 0x4:
14249 Pred = ICmpInst::ICMP_EQ;
14250 break;
14251 case 0x5:
14252 Pred = ICmpInst::ICMP_NE;
14253 break;
14254 case 0x6:
14255 return llvm::Constant::getNullValue(Ty); // FALSE
14256 case 0x7:
14257 return llvm::Constant::getAllOnesValue(Ty); // TRUE
14258 default:
14259 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
14260 }
14261
14262 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
14263 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
14264 return Res;
14265}
14266
14268 Value *Mask, Value *Op0, Value *Op1) {
14269
14270 // If the mask is all ones just return first argument.
14271 if (const auto *C = dyn_cast<Constant>(Mask))
14272 if (C->isAllOnesValue())
14273 return Op0;
14274
14275 Mask = getMaskVecValue(
14276 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
14277
14278 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14279}
14280
14282 Value *Mask, Value *Op0, Value *Op1) {
14283 // If the mask is all ones just return first argument.
14284 if (const auto *C = dyn_cast<Constant>(Mask))
14285 if (C->isAllOnesValue())
14286 return Op0;
14287
14288 auto *MaskTy = llvm::FixedVectorType::get(
14289 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
14290 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
14291 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
14292 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14293}
14294
14296 unsigned NumElts, Value *MaskIn) {
14297 if (MaskIn) {
14298 const auto *C = dyn_cast<Constant>(MaskIn);
14299 if (!C || !C->isAllOnesValue())
14300 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
14301 }
14302
14303 if (NumElts < 8) {
14304 int Indices[8];
14305 for (unsigned i = 0; i != NumElts; ++i)
14306 Indices[i] = i;
14307 for (unsigned i = NumElts; i != 8; ++i)
14308 Indices[i] = i % NumElts + NumElts;
14309 Cmp = CGF.Builder.CreateShuffleVector(
14310 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
14311 }
14312
14313 return CGF.Builder.CreateBitCast(Cmp,
14314 IntegerType::get(CGF.getLLVMContext(),
14315 std::max(NumElts, 8U)));
14316}
14317
14319 bool Signed, ArrayRef<Value *> Ops) {
14320 assert((Ops.size() == 2 || Ops.size() == 4) &&
14321 "Unexpected number of arguments");
14322 unsigned NumElts =
14323 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14324 Value *Cmp;
14325
14326 if (CC == 3) {
14327 Cmp = Constant::getNullValue(
14328 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14329 } else if (CC == 7) {
14330 Cmp = Constant::getAllOnesValue(
14331 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14332 } else {
14333 ICmpInst::Predicate Pred;
14334 switch (CC) {
14335 default: llvm_unreachable("Unknown condition code");
14336 case 0: Pred = ICmpInst::ICMP_EQ; break;
14337 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
14338 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
14339 case 4: Pred = ICmpInst::ICMP_NE; break;
14340 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
14341 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
14342 }
14343 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
14344 }
14345
14346 Value *MaskIn = nullptr;
14347 if (Ops.size() == 4)
14348 MaskIn = Ops[3];
14349
14350 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
14351}
14352
14354 Value *Zero = Constant::getNullValue(In->getType());
14355 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
14356}
14357
14359 ArrayRef<Value *> Ops, bool IsSigned) {
14360 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
14361 llvm::Type *Ty = Ops[1]->getType();
14362
14363 Value *Res;
14364 if (Rnd != 4) {
14365 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
14366 : Intrinsic::x86_avx512_uitofp_round;
14367 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
14368 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
14369 } else {
14370 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14371 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
14372 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
14373 }
14374
14375 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14376}
14377
14378// Lowers X86 FMA intrinsics to IR.
14380 ArrayRef<Value *> Ops, unsigned BuiltinID,
14381 bool IsAddSub) {
14382
14383 bool Subtract = false;
14384 Intrinsic::ID IID = Intrinsic::not_intrinsic;
14385 switch (BuiltinID) {
14386 default: break;
14387 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14388 Subtract = true;
14389 [[fallthrough]];
14390 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14391 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14392 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14393 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
14394 break;
14395 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14396 Subtract = true;
14397 [[fallthrough]];
14398 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14399 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14400 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14401 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
14402 break;
14403 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14404 Subtract = true;
14405 [[fallthrough]];
14406 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14407 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14408 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14409 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
14410 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14411 Subtract = true;
14412 [[fallthrough]];
14413 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14414 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14415 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14416 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
14417 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14418 Subtract = true;
14419 [[fallthrough]];
14420 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14421 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14422 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14423 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
14424 break;
14425 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14426 Subtract = true;
14427 [[fallthrough]];
14428 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14429 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14430 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14431 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
14432 break;
14433 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14434 Subtract = true;
14435 LLVM_FALLTHROUGH;
14436 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14437 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14438 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14439 IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
14440 break;
14441 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14442 Subtract = true;
14443 LLVM_FALLTHROUGH;
14444 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14445 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14446 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14447 IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
14448 break;
14449 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14450 Subtract = true;
14451 LLVM_FALLTHROUGH;
14452 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14453 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14454 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14455 IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
14456 break;
14457 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14458 Subtract = true;
14459 LLVM_FALLTHROUGH;
14460 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14461 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14462 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14463 IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
14464 break;
14465 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14466 Subtract = true;
14467 LLVM_FALLTHROUGH;
14468 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14469 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14470 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14471 IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
14472 break;
14473 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14474 Subtract = true;
14475 LLVM_FALLTHROUGH;
14476 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14477 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14478 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14479 IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
14480 break;
14481 }
14482
14483 Value *A = Ops[0];
14484 Value *B = Ops[1];
14485 Value *C = Ops[2];
14486
14487 if (Subtract)
14488 C = CGF.Builder.CreateFNeg(C);
14489
14490 Value *Res;
14491
14492 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
14493 if (IID != Intrinsic::not_intrinsic &&
14494 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
14495 IsAddSub)) {
14496 Function *Intr = CGF.CGM.getIntrinsic(IID);
14497 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
14498 } else {
14499 llvm::Type *Ty = A->getType();
14500 Function *FMA;
14501 if (CGF.Builder.getIsFPConstrained()) {
14502 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14503 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
14504 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
14505 } else {
14506 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
14507 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
14508 }
14509 }
14510
14511 // Handle any required masking.
14512 Value *MaskFalseVal = nullptr;
14513 switch (BuiltinID) {
14514 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14515 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14516 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14517 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14518 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14519 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14520 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14521 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14522 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14523 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14524 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14525 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14526 MaskFalseVal = Ops[0];
14527 break;
14528 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14529 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14530 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14531 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14532 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14533 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14534 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14535 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14536 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14537 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14538 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14539 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14540 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14541 break;
14542 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14543 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14544 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14545 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14546 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14547 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14548 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14549 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14550 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14551 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14552 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14553 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14554 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14555 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14556 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14557 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14558 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14559 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14560 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14561 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14562 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14563 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14564 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14565 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14566 MaskFalseVal = Ops[2];
14567 break;
14568 }
14569
14570 if (MaskFalseVal)
14571 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14572
14573 return Res;
14574}
14575
14577 MutableArrayRef<Value *> Ops, Value *Upper,
14578 bool ZeroMask = false, unsigned PTIdx = 0,
14579 bool NegAcc = false) {
14580 unsigned Rnd = 4;
14581 if (Ops.size() > 4)
14582 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14583
14584 if (NegAcc)
14585 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14586
14587 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14588 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14589 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14590 Value *Res;
14591 if (Rnd != 4) {
14592 Intrinsic::ID IID;
14593
14594 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14595 case 16:
14596 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14597 break;
14598 case 32:
14599 IID = Intrinsic::x86_avx512_vfmadd_f32;
14600 break;
14601 case 64:
14602 IID = Intrinsic::x86_avx512_vfmadd_f64;
14603 break;
14604 default:
14605 llvm_unreachable("Unexpected size");
14606 }
14607 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14608 {Ops[0], Ops[1], Ops[2], Ops[4]});
14609 } else if (CGF.Builder.getIsFPConstrained()) {
14610 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14611 Function *FMA = CGF.CGM.getIntrinsic(
14612 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14613 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14614 } else {
14615 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14616 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14617 }
14618 // If we have more than 3 arguments, we need to do masking.
14619 if (Ops.size() > 3) {
14620 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14621 : Ops[PTIdx];
14622
14623 // If we negated the accumulator and the its the PassThru value we need to
14624 // bypass the negate. Conveniently Upper should be the same thing in this
14625 // case.
14626 if (NegAcc && PTIdx == 2)
14627 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14628
14629 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14630 }
14631 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14632}
14633
14634static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14635 ArrayRef<Value *> Ops) {
14636 llvm::Type *Ty = Ops[0]->getType();
14637 // Arguments have a vXi32 type so cast to vXi64.
14638 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14639 Ty->getPrimitiveSizeInBits() / 64);
14640 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14641 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14642
14643 if (IsSigned) {
14644 // Shift left then arithmetic shift right.
14645 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14646 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14647 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14648 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14649 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14650 } else {
14651 // Clear the upper bits.
14652 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14653 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14654 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14655 }
14656
14657 return CGF.Builder.CreateMul(LHS, RHS);
14658}
14659
14660// Emit a masked pternlog intrinsic. This only exists because the header has to
14661// use a macro and we aren't able to pass the input argument to a pternlog
14662// builtin and a select builtin without evaluating it twice.
14663static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14664 ArrayRef<Value *> Ops) {
14665 llvm::Type *Ty = Ops[0]->getType();
14666
14667 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14668 unsigned EltWidth = Ty->getScalarSizeInBits();
14669 Intrinsic::ID IID;
14670 if (VecWidth == 128 && EltWidth == 32)
14671 IID = Intrinsic::x86_avx512_pternlog_d_128;
14672 else if (VecWidth == 256 && EltWidth == 32)
14673 IID = Intrinsic::x86_avx512_pternlog_d_256;
14674 else if (VecWidth == 512 && EltWidth == 32)
14675 IID = Intrinsic::x86_avx512_pternlog_d_512;
14676 else if (VecWidth == 128 && EltWidth == 64)
14677 IID = Intrinsic::x86_avx512_pternlog_q_128;
14678 else if (VecWidth == 256 && EltWidth == 64)
14679 IID = Intrinsic::x86_avx512_pternlog_q_256;
14680 else if (VecWidth == 512 && EltWidth == 64)
14681 IID = Intrinsic::x86_avx512_pternlog_q_512;
14682 else
14683 llvm_unreachable("Unexpected intrinsic");
14684
14685 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14686 Ops.drop_back());
14687 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14688 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14689}
14690
14692 llvm::Type *DstTy) {
14693 unsigned NumberOfElements =
14694 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14695 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14696 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14697}
14698
14699Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14700 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14701 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14702 return EmitX86CpuIs(CPUStr);
14703}
14704
14705// Convert F16 halfs to floats.
14708 llvm::Type *DstTy) {
14709 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14710 "Unknown cvtph2ps intrinsic");
14711
14712 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14713 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14714 Function *F =
14715 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14716 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14717 }
14718
14719 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14720 Value *Src = Ops[0];
14721
14722 // Extract the subvector.
14723 if (NumDstElts !=
14724 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14725 assert(NumDstElts == 4 && "Unexpected vector size");
14726 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14727 }
14728
14729 // Bitcast from vXi16 to vXf16.
14730 auto *HalfTy = llvm::FixedVectorType::get(
14731 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14732 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14733
14734 // Perform the fp-extension.
14735 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14736
14737 if (Ops.size() >= 3)
14738 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14739 return Res;
14740}
14741
14742Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14743
14744 llvm::Type *Int32Ty = Builder.getInt32Ty();
14745
14746 // Matching the struct layout from the compiler-rt/libgcc structure that is
14747 // filled in:
14748 // unsigned int __cpu_vendor;
14749 // unsigned int __cpu_type;
14750 // unsigned int __cpu_subtype;
14751 // unsigned int __cpu_features[1];
14752 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14753 llvm::ArrayType::get(Int32Ty, 1));
14754
14755 // Grab the global __cpu_model.
14756 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14757 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14758
14759 // Calculate the index needed to access the correct field based on the
14760 // range. Also adjust the expected value.
14761 unsigned Index;
14762 unsigned Value;
14763 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14764#define X86_VENDOR(ENUM, STRING) \
14765 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14766#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14767 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14768#define X86_CPU_TYPE(ENUM, STR) \
14769 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14770#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14771 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14772#define X86_CPU_SUBTYPE(ENUM, STR) \
14773 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14774#include "llvm/TargetParser/X86TargetParser.def"
14775 .Default({0, 0});
14776 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14777
14778 // Grab the appropriate field from __cpu_model.
14779 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14780 ConstantInt::get(Int32Ty, Index)};
14781 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14782 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14784
14785 // Check the value of the field against the requested value.
14786 return Builder.CreateICmpEQ(CpuValue,
14787 llvm::ConstantInt::get(Int32Ty, Value));
14788}
14789
14790Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14791 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14792 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14793 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14794 return Builder.getFalse();
14795 return EmitX86CpuSupports(FeatureStr);
14796}
14797
14798Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14799 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14800}
14801
14802llvm::Value *
14803CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14804 Value *Result = Builder.getTrue();
14805 if (FeatureMask[0] != 0) {
14806 // Matching the struct layout from the compiler-rt/libgcc structure that is
14807 // filled in:
14808 // unsigned int __cpu_vendor;
14809 // unsigned int __cpu_type;
14810 // unsigned int __cpu_subtype;
14811 // unsigned int __cpu_features[1];
14812 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14813 llvm::ArrayType::get(Int32Ty, 1));
14814
14815 // Grab the global __cpu_model.
14816 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14817 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14818
14819 // Grab the first (0th) element from the field __cpu_features off of the
14820 // global in the struct STy.
14821 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14822 Builder.getInt32(0)};
14823 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14824 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14826
14827 // Check the value of the bit corresponding to the feature requested.
14828 Value *Mask = Builder.getInt32(FeatureMask[0]);
14829 Value *Bitset = Builder.CreateAnd(Features, Mask);
14830 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14831 Result = Builder.CreateAnd(Result, Cmp);
14832 }
14833
14834 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14835 llvm::Constant *CpuFeatures2 =
14836 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14837 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14838 for (int i = 1; i != 4; ++i) {
14839 const uint32_t M = FeatureMask[i];
14840 if (!M)
14841 continue;
14842 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14843 Value *Features = Builder.CreateAlignedLoad(
14844 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
14846 // Check the value of the bit corresponding to the feature requested.
14847 Value *Mask = Builder.getInt32(M);
14848 Value *Bitset = Builder.CreateAnd(Features, Mask);
14849 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14850 Result = Builder.CreateAnd(Result, Cmp);
14851 }
14852
14853 return Result;
14854}
14855
14856Value *CodeGenFunction::EmitAArch64CpuInit() {
14857 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14858 llvm::FunctionCallee Func =
14859 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14860 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14861 cast<llvm::GlobalValue>(Func.getCallee())
14862 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14863 return Builder.CreateCall(Func);
14864}
14865
14867 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {VoidPtrTy}, false);
14868 llvm::FunctionCallee Func =
14869 CGM.CreateRuntimeFunction(FTy, "__init_riscv_feature_bits");
14870 auto *CalleeGV = cast<llvm::GlobalValue>(Func.getCallee());
14871 CalleeGV->setDSOLocal(true);
14872 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14873 return Builder.CreateCall(Func, {llvm::ConstantPointerNull::get(VoidPtrTy)});
14874}
14875
14876Value *CodeGenFunction::EmitX86CpuInit() {
14877 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14878 /*Variadic*/ false);
14879 llvm::FunctionCallee Func =
14880 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14881 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14882 cast<llvm::GlobalValue>(Func.getCallee())
14883 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14884 return Builder.CreateCall(Func);
14885}
14886
14887Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14888 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14889 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14891 ArgStr.split(Features, "+");
14892 for (auto &Feature : Features) {
14893 Feature = Feature.trim();
14894 if (!llvm::AArch64::parseFMVExtension(Feature))
14895 return Builder.getFalse();
14896 if (Feature != "default")
14897 Features.push_back(Feature);
14898 }
14899 return EmitAArch64CpuSupports(Features);
14900}
14901
14902llvm::Value *
14903CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14904 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14905 Value *Result = Builder.getTrue();
14906 if (FeaturesMask != 0) {
14907 // Get features from structure in runtime library
14908 // struct {
14909 // unsigned long long features;
14910 // } __aarch64_cpu_features;
14911 llvm::Type *STy = llvm::StructType::get(Int64Ty);
14912 llvm::Constant *AArch64CPUFeatures =
14913 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14914 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14915 llvm::Value *CpuFeatures = Builder.CreateGEP(
14916 STy, AArch64CPUFeatures,
14917 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14918 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14920 Value *Mask = Builder.getInt64(FeaturesMask);
14921 Value *Bitset = Builder.CreateAnd(Features, Mask);
14922 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14923 Result = Builder.CreateAnd(Result, Cmp);
14924 }
14925 return Result;
14926}
14927
14929
14930 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14931 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14932 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14933 return Builder.getFalse();
14934
14935 return EmitRISCVCpuSupports(ArrayRef<StringRef>(FeatureStr));
14936}
14937
14938static Value *loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder,
14939 CodeGenModule &CGM) {
14940 llvm::Type *Int32Ty = Builder.getInt32Ty();
14941 llvm::Type *Int64Ty = Builder.getInt64Ty();
14942 llvm::ArrayType *ArrayOfInt64Ty =
14943 llvm::ArrayType::get(Int64Ty, llvm::RISCVISAInfo::FeatureBitSize);
14944 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
14945 llvm::Constant *RISCVFeaturesBits =
14946 CGM.CreateRuntimeVariable(StructTy, "__riscv_feature_bits");
14947 cast<llvm::GlobalValue>(RISCVFeaturesBits)->setDSOLocal(true);
14948 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
14949 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
14950 IndexVal};
14951 Value *Ptr =
14952 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
14953 Value *FeaturesBit =
14954 Builder.CreateAlignedLoad(Int64Ty, Ptr, CharUnits::fromQuantity(8));
14955 return FeaturesBit;
14956}
14957
14959 const unsigned RISCVFeatureLength = llvm::RISCVISAInfo::FeatureBitSize;
14960 uint64_t RequireBitMasks[RISCVFeatureLength] = {0};
14961
14962 for (auto Feat : FeaturesStrs) {
14963 auto [GroupID, BitPos] = RISCVISAInfo::getRISCVFeaturesBitsInfo(Feat);
14964
14965 // If there isn't BitPos for this feature, skip this version.
14966 // It also report the warning to user during compilation.
14967 if (BitPos == -1)
14968 return Builder.getFalse();
14969
14970 RequireBitMasks[GroupID] |= (1ULL << BitPos);
14971 }
14972
14973 Value *Result = nullptr;
14974 for (unsigned Idx = 0; Idx < RISCVFeatureLength; Idx++) {
14975 if (RequireBitMasks[Idx] == 0)
14976 continue;
14977
14978 Value *Mask = Builder.getInt64(RequireBitMasks[Idx]);
14979 Value *Bitset =
14980 Builder.CreateAnd(loadRISCVFeatureBits(Idx, Builder, CGM), Mask);
14981 Value *CmpV = Builder.CreateICmpEQ(Bitset, Mask);
14982 Result = (!Result) ? CmpV : Builder.CreateAnd(Result, CmpV);
14983 }
14984
14985 assert(Result && "Should have value here.");
14986
14987 return Result;
14988}
14989
14990Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14991 const CallExpr *E) {
14992 if (BuiltinID == Builtin::BI__builtin_cpu_is)
14993 return EmitX86CpuIs(E);
14994 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14995 return EmitX86CpuSupports(E);
14996 if (BuiltinID == Builtin::BI__builtin_cpu_init)
14997 return EmitX86CpuInit();
14998
14999 // Handle MSVC intrinsics before argument evaluation to prevent double
15000 // evaluation.
15001 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
15002 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
15003
15005 bool IsMaskFCmp = false;
15006 bool IsConjFMA = false;
15007
15008 // Find out if any arguments are required to be integer constant expressions.
15009 unsigned ICEArguments = 0;
15011 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
15012 assert(Error == ASTContext::GE_None && "Should not codegen an error");
15013
15014 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
15015 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
15016 }
15017
15018 // These exist so that the builtin that takes an immediate can be bounds
15019 // checked by clang to avoid passing bad immediates to the backend. Since
15020 // AVX has a larger immediate than SSE we would need separate builtins to
15021 // do the different bounds checking. Rather than create a clang specific
15022 // SSE only builtin, this implements eight separate builtins to match gcc
15023 // implementation.
15024 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
15025 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
15026 llvm::Function *F = CGM.getIntrinsic(ID);
15027 return Builder.CreateCall(F, Ops);
15028 };
15029
15030 // For the vector forms of FP comparisons, translate the builtins directly to
15031 // IR.
15032 // TODO: The builtins could be removed if the SSE header files used vector
15033 // extension comparisons directly (vector ordered/unordered may need
15034 // additional support via __builtin_isnan()).
15035 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
15036 bool IsSignaling) {
15037 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15038 Value *Cmp;
15039 if (IsSignaling)
15040 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
15041 else
15042 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
15043 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
15044 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
15045 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
15046 return Builder.CreateBitCast(Sext, FPVecTy);
15047 };
15048
15049 switch (BuiltinID) {
15050 default: return nullptr;
15051 case X86::BI_mm_prefetch: {
15052 Value *Address = Ops[0];
15053 ConstantInt *C = cast<ConstantInt>(Ops[1]);
15054 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
15055 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
15056 Value *Data = ConstantInt::get(Int32Ty, 1);
15057 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
15058 return Builder.CreateCall(F, {Address, RW, Locality, Data});
15059 }
15060 case X86::BI_mm_clflush: {
15061 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
15062 Ops[0]);
15063 }
15064 case X86::BI_mm_lfence: {
15065 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
15066 }
15067 case X86::BI_mm_mfence: {
15068 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
15069 }
15070 case X86::BI_mm_sfence: {
15071 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
15072 }
15073 case X86::BI_mm_pause: {
15074 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
15075 }
15076 case X86::BI__rdtsc: {
15077 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
15078 }
15079 case X86::BI__builtin_ia32_rdtscp: {
15080 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
15081 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15082 Ops[0]);
15083 return Builder.CreateExtractValue(Call, 0);
15084 }
15085 case X86::BI__builtin_ia32_lzcnt_u16:
15086 case X86::BI__builtin_ia32_lzcnt_u32:
15087 case X86::BI__builtin_ia32_lzcnt_u64: {
15088 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15089 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15090 }
15091 case X86::BI__builtin_ia32_tzcnt_u16:
15092 case X86::BI__builtin_ia32_tzcnt_u32:
15093 case X86::BI__builtin_ia32_tzcnt_u64: {
15094 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
15095 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15096 }
15097 case X86::BI__builtin_ia32_undef128:
15098 case X86::BI__builtin_ia32_undef256:
15099 case X86::BI__builtin_ia32_undef512:
15100 // The x86 definition of "undef" is not the same as the LLVM definition
15101 // (PR32176). We leave optimizing away an unnecessary zero constant to the
15102 // IR optimizer and backend.
15103 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
15104 // value, we should use that here instead of a zero.
15105 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15106 case X86::BI__builtin_ia32_vec_ext_v4hi:
15107 case X86::BI__builtin_ia32_vec_ext_v16qi:
15108 case X86::BI__builtin_ia32_vec_ext_v8hi:
15109 case X86::BI__builtin_ia32_vec_ext_v4si:
15110 case X86::BI__builtin_ia32_vec_ext_v4sf:
15111 case X86::BI__builtin_ia32_vec_ext_v2di:
15112 case X86::BI__builtin_ia32_vec_ext_v32qi:
15113 case X86::BI__builtin_ia32_vec_ext_v16hi:
15114 case X86::BI__builtin_ia32_vec_ext_v8si:
15115 case X86::BI__builtin_ia32_vec_ext_v4di: {
15116 unsigned NumElts =
15117 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15118 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15119 Index &= NumElts - 1;
15120 // These builtins exist so we can ensure the index is an ICE and in range.
15121 // Otherwise we could just do this in the header file.
15122 return Builder.CreateExtractElement(Ops[0], Index);
15123 }
15124 case X86::BI__builtin_ia32_vec_set_v4hi:
15125 case X86::BI__builtin_ia32_vec_set_v16qi:
15126 case X86::BI__builtin_ia32_vec_set_v8hi:
15127 case X86::BI__builtin_ia32_vec_set_v4si:
15128 case X86::BI__builtin_ia32_vec_set_v2di:
15129 case X86::BI__builtin_ia32_vec_set_v32qi:
15130 case X86::BI__builtin_ia32_vec_set_v16hi:
15131 case X86::BI__builtin_ia32_vec_set_v8si:
15132 case X86::BI__builtin_ia32_vec_set_v4di: {
15133 unsigned NumElts =
15134 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15135 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15136 Index &= NumElts - 1;
15137 // These builtins exist so we can ensure the index is an ICE and in range.
15138 // Otherwise we could just do this in the header file.
15139 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
15140 }
15141 case X86::BI_mm_setcsr:
15142 case X86::BI__builtin_ia32_ldmxcsr: {
15143 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
15144 Builder.CreateStore(Ops[0], Tmp);
15145 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
15146 Tmp.getPointer());
15147 }
15148 case X86::BI_mm_getcsr:
15149 case X86::BI__builtin_ia32_stmxcsr: {
15151 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
15152 Tmp.getPointer());
15153 return Builder.CreateLoad(Tmp, "stmxcsr");
15154 }
15155 case X86::BI__builtin_ia32_xsave:
15156 case X86::BI__builtin_ia32_xsave64:
15157 case X86::BI__builtin_ia32_xrstor:
15158 case X86::BI__builtin_ia32_xrstor64:
15159 case X86::BI__builtin_ia32_xsaveopt:
15160 case X86::BI__builtin_ia32_xsaveopt64:
15161 case X86::BI__builtin_ia32_xrstors:
15162 case X86::BI__builtin_ia32_xrstors64:
15163 case X86::BI__builtin_ia32_xsavec:
15164 case X86::BI__builtin_ia32_xsavec64:
15165 case X86::BI__builtin_ia32_xsaves:
15166 case X86::BI__builtin_ia32_xsaves64:
15167 case X86::BI__builtin_ia32_xsetbv:
15168 case X86::BI_xsetbv: {
15169 Intrinsic::ID ID;
15170#define INTRINSIC_X86_XSAVE_ID(NAME) \
15171 case X86::BI__builtin_ia32_##NAME: \
15172 ID = Intrinsic::x86_##NAME; \
15173 break
15174 switch (BuiltinID) {
15175 default: llvm_unreachable("Unsupported intrinsic!");
15177 INTRINSIC_X86_XSAVE_ID(xsave64);
15178 INTRINSIC_X86_XSAVE_ID(xrstor);
15179 INTRINSIC_X86_XSAVE_ID(xrstor64);
15180 INTRINSIC_X86_XSAVE_ID(xsaveopt);
15181 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
15182 INTRINSIC_X86_XSAVE_ID(xrstors);
15183 INTRINSIC_X86_XSAVE_ID(xrstors64);
15184 INTRINSIC_X86_XSAVE_ID(xsavec);
15185 INTRINSIC_X86_XSAVE_ID(xsavec64);
15186 INTRINSIC_X86_XSAVE_ID(xsaves);
15187 INTRINSIC_X86_XSAVE_ID(xsaves64);
15188 INTRINSIC_X86_XSAVE_ID(xsetbv);
15189 case X86::BI_xsetbv:
15190 ID = Intrinsic::x86_xsetbv;
15191 break;
15192 }
15193#undef INTRINSIC_X86_XSAVE_ID
15194 Value *Mhi = Builder.CreateTrunc(
15195 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
15196 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
15197 Ops[1] = Mhi;
15198 Ops.push_back(Mlo);
15199 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15200 }
15201 case X86::BI__builtin_ia32_xgetbv:
15202 case X86::BI_xgetbv:
15203 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
15204 case X86::BI__builtin_ia32_storedqudi128_mask:
15205 case X86::BI__builtin_ia32_storedqusi128_mask:
15206 case X86::BI__builtin_ia32_storedquhi128_mask:
15207 case X86::BI__builtin_ia32_storedquqi128_mask:
15208 case X86::BI__builtin_ia32_storeupd128_mask:
15209 case X86::BI__builtin_ia32_storeups128_mask:
15210 case X86::BI__builtin_ia32_storedqudi256_mask:
15211 case X86::BI__builtin_ia32_storedqusi256_mask:
15212 case X86::BI__builtin_ia32_storedquhi256_mask:
15213 case X86::BI__builtin_ia32_storedquqi256_mask:
15214 case X86::BI__builtin_ia32_storeupd256_mask:
15215 case X86::BI__builtin_ia32_storeups256_mask:
15216 case X86::BI__builtin_ia32_storedqudi512_mask:
15217 case X86::BI__builtin_ia32_storedqusi512_mask:
15218 case X86::BI__builtin_ia32_storedquhi512_mask:
15219 case X86::BI__builtin_ia32_storedquqi512_mask:
15220 case X86::BI__builtin_ia32_storeupd512_mask:
15221 case X86::BI__builtin_ia32_storeups512_mask:
15222 return EmitX86MaskedStore(*this, Ops, Align(1));
15223
15224 case X86::BI__builtin_ia32_storesbf16128_mask:
15225 case X86::BI__builtin_ia32_storesh128_mask:
15226 case X86::BI__builtin_ia32_storess128_mask:
15227 case X86::BI__builtin_ia32_storesd128_mask:
15228 return EmitX86MaskedStore(*this, Ops, Align(1));
15229
15230 case X86::BI__builtin_ia32_cvtmask2b128:
15231 case X86::BI__builtin_ia32_cvtmask2b256:
15232 case X86::BI__builtin_ia32_cvtmask2b512:
15233 case X86::BI__builtin_ia32_cvtmask2w128:
15234 case X86::BI__builtin_ia32_cvtmask2w256:
15235 case X86::BI__builtin_ia32_cvtmask2w512:
15236 case X86::BI__builtin_ia32_cvtmask2d128:
15237 case X86::BI__builtin_ia32_cvtmask2d256:
15238 case X86::BI__builtin_ia32_cvtmask2d512:
15239 case X86::BI__builtin_ia32_cvtmask2q128:
15240 case X86::BI__builtin_ia32_cvtmask2q256:
15241 case X86::BI__builtin_ia32_cvtmask2q512:
15242 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
15243
15244 case X86::BI__builtin_ia32_cvtb2mask128:
15245 case X86::BI__builtin_ia32_cvtb2mask256:
15246 case X86::BI__builtin_ia32_cvtb2mask512:
15247 case X86::BI__builtin_ia32_cvtw2mask128:
15248 case X86::BI__builtin_ia32_cvtw2mask256:
15249 case X86::BI__builtin_ia32_cvtw2mask512:
15250 case X86::BI__builtin_ia32_cvtd2mask128:
15251 case X86::BI__builtin_ia32_cvtd2mask256:
15252 case X86::BI__builtin_ia32_cvtd2mask512:
15253 case X86::BI__builtin_ia32_cvtq2mask128:
15254 case X86::BI__builtin_ia32_cvtq2mask256:
15255 case X86::BI__builtin_ia32_cvtq2mask512:
15256 return EmitX86ConvertToMask(*this, Ops[0]);
15257
15258 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
15259 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
15260 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
15261 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
15262 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
15263 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
15264 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
15265 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
15266 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
15267 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
15268 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
15269 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
15270 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
15271 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
15272 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
15273 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
15274 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
15275 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
15276 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
15277 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
15278 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
15279 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
15280 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
15281 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
15282 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
15283 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
15284
15285 case X86::BI__builtin_ia32_vfmaddss3:
15286 case X86::BI__builtin_ia32_vfmaddsd3:
15287 case X86::BI__builtin_ia32_vfmaddsh3_mask:
15288 case X86::BI__builtin_ia32_vfmaddss3_mask:
15289 case X86::BI__builtin_ia32_vfmaddsd3_mask:
15290 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
15291 case X86::BI__builtin_ia32_vfmaddss:
15292 case X86::BI__builtin_ia32_vfmaddsd:
15293 return EmitScalarFMAExpr(*this, E, Ops,
15294 Constant::getNullValue(Ops[0]->getType()));
15295 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
15296 case X86::BI__builtin_ia32_vfmaddss3_maskz:
15297 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
15298 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
15299 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
15300 case X86::BI__builtin_ia32_vfmaddss3_mask3:
15301 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
15302 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
15303 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
15304 case X86::BI__builtin_ia32_vfmsubss3_mask3:
15305 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
15306 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
15307 /*NegAcc*/ true);
15308 case X86::BI__builtin_ia32_vfmaddph:
15309 case X86::BI__builtin_ia32_vfmaddps:
15310 case X86::BI__builtin_ia32_vfmaddpd:
15311 case X86::BI__builtin_ia32_vfmaddph256:
15312 case X86::BI__builtin_ia32_vfmaddps256:
15313 case X86::BI__builtin_ia32_vfmaddpd256:
15314 case X86::BI__builtin_ia32_vfmaddph512_mask:
15315 case X86::BI__builtin_ia32_vfmaddph512_maskz:
15316 case X86::BI__builtin_ia32_vfmaddph512_mask3:
15317 case X86::BI__builtin_ia32_vfmaddnepbh128:
15318 case X86::BI__builtin_ia32_vfmaddnepbh256:
15319 case X86::BI__builtin_ia32_vfmaddnepbh512:
15320 case X86::BI__builtin_ia32_vfmaddps512_mask:
15321 case X86::BI__builtin_ia32_vfmaddps512_maskz:
15322 case X86::BI__builtin_ia32_vfmaddps512_mask3:
15323 case X86::BI__builtin_ia32_vfmsubps512_mask3:
15324 case X86::BI__builtin_ia32_vfmaddpd512_mask:
15325 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
15326 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
15327 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
15328 case X86::BI__builtin_ia32_vfmsubph512_mask3:
15329 case X86::BI__builtin_ia32_vfmaddph256_round_mask:
15330 case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
15331 case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
15332 case X86::BI__builtin_ia32_vfmaddps256_round_mask:
15333 case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
15334 case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
15335 case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
15336 case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
15337 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
15338 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
15339 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
15340 case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
15341 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
15342 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
15343 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
15344 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
15345 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
15346 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
15347 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
15348 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
15349 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
15350 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
15351 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
15352 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
15353 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
15354 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
15355 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
15356 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
15357 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
15358 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
15359 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
15360 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
15361 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
15362 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
15363 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
15364 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
15365 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
15366 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
15367
15368 case X86::BI__builtin_ia32_movdqa32store128_mask:
15369 case X86::BI__builtin_ia32_movdqa64store128_mask:
15370 case X86::BI__builtin_ia32_storeaps128_mask:
15371 case X86::BI__builtin_ia32_storeapd128_mask:
15372 case X86::BI__builtin_ia32_movdqa32store256_mask:
15373 case X86::BI__builtin_ia32_movdqa64store256_mask:
15374 case X86::BI__builtin_ia32_storeaps256_mask:
15375 case X86::BI__builtin_ia32_storeapd256_mask:
15376 case X86::BI__builtin_ia32_movdqa32store512_mask:
15377 case X86::BI__builtin_ia32_movdqa64store512_mask:
15378 case X86::BI__builtin_ia32_storeaps512_mask:
15379 case X86::BI__builtin_ia32_storeapd512_mask:
15380 return EmitX86MaskedStore(
15381 *this, Ops,
15382 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15383
15384 case X86::BI__builtin_ia32_loadups128_mask:
15385 case X86::BI__builtin_ia32_loadups256_mask:
15386 case X86::BI__builtin_ia32_loadups512_mask:
15387 case X86::BI__builtin_ia32_loadupd128_mask:
15388 case X86::BI__builtin_ia32_loadupd256_mask:
15389 case X86::BI__builtin_ia32_loadupd512_mask:
15390 case X86::BI__builtin_ia32_loaddquqi128_mask:
15391 case X86::BI__builtin_ia32_loaddquqi256_mask:
15392 case X86::BI__builtin_ia32_loaddquqi512_mask:
15393 case X86::BI__builtin_ia32_loaddquhi128_mask:
15394 case X86::BI__builtin_ia32_loaddquhi256_mask:
15395 case X86::BI__builtin_ia32_loaddquhi512_mask:
15396 case X86::BI__builtin_ia32_loaddqusi128_mask:
15397 case X86::BI__builtin_ia32_loaddqusi256_mask:
15398 case X86::BI__builtin_ia32_loaddqusi512_mask:
15399 case X86::BI__builtin_ia32_loaddqudi128_mask:
15400 case X86::BI__builtin_ia32_loaddqudi256_mask:
15401 case X86::BI__builtin_ia32_loaddqudi512_mask:
15402 return EmitX86MaskedLoad(*this, Ops, Align(1));
15403
15404 case X86::BI__builtin_ia32_loadsbf16128_mask:
15405 case X86::BI__builtin_ia32_loadsh128_mask:
15406 case X86::BI__builtin_ia32_loadss128_mask:
15407 case X86::BI__builtin_ia32_loadsd128_mask:
15408 return EmitX86MaskedLoad(*this, Ops, Align(1));
15409
15410 case X86::BI__builtin_ia32_loadaps128_mask:
15411 case X86::BI__builtin_ia32_loadaps256_mask:
15412 case X86::BI__builtin_ia32_loadaps512_mask:
15413 case X86::BI__builtin_ia32_loadapd128_mask:
15414 case X86::BI__builtin_ia32_loadapd256_mask:
15415 case X86::BI__builtin_ia32_loadapd512_mask:
15416 case X86::BI__builtin_ia32_movdqa32load128_mask:
15417 case X86::BI__builtin_ia32_movdqa32load256_mask:
15418 case X86::BI__builtin_ia32_movdqa32load512_mask:
15419 case X86::BI__builtin_ia32_movdqa64load128_mask:
15420 case X86::BI__builtin_ia32_movdqa64load256_mask:
15421 case X86::BI__builtin_ia32_movdqa64load512_mask:
15422 return EmitX86MaskedLoad(
15423 *this, Ops,
15424 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15425
15426 case X86::BI__builtin_ia32_expandloaddf128_mask:
15427 case X86::BI__builtin_ia32_expandloaddf256_mask:
15428 case X86::BI__builtin_ia32_expandloaddf512_mask:
15429 case X86::BI__builtin_ia32_expandloadsf128_mask:
15430 case X86::BI__builtin_ia32_expandloadsf256_mask:
15431 case X86::BI__builtin_ia32_expandloadsf512_mask:
15432 case X86::BI__builtin_ia32_expandloaddi128_mask:
15433 case X86::BI__builtin_ia32_expandloaddi256_mask:
15434 case X86::BI__builtin_ia32_expandloaddi512_mask:
15435 case X86::BI__builtin_ia32_expandloadsi128_mask:
15436 case X86::BI__builtin_ia32_expandloadsi256_mask:
15437 case X86::BI__builtin_ia32_expandloadsi512_mask:
15438 case X86::BI__builtin_ia32_expandloadhi128_mask:
15439 case X86::BI__builtin_ia32_expandloadhi256_mask:
15440 case X86::BI__builtin_ia32_expandloadhi512_mask:
15441 case X86::BI__builtin_ia32_expandloadqi128_mask:
15442 case X86::BI__builtin_ia32_expandloadqi256_mask:
15443 case X86::BI__builtin_ia32_expandloadqi512_mask:
15444 return EmitX86ExpandLoad(*this, Ops);
15445
15446 case X86::BI__builtin_ia32_compressstoredf128_mask:
15447 case X86::BI__builtin_ia32_compressstoredf256_mask:
15448 case X86::BI__builtin_ia32_compressstoredf512_mask:
15449 case X86::BI__builtin_ia32_compressstoresf128_mask:
15450 case X86::BI__builtin_ia32_compressstoresf256_mask:
15451 case X86::BI__builtin_ia32_compressstoresf512_mask:
15452 case X86::BI__builtin_ia32_compressstoredi128_mask:
15453 case X86::BI__builtin_ia32_compressstoredi256_mask:
15454 case X86::BI__builtin_ia32_compressstoredi512_mask:
15455 case X86::BI__builtin_ia32_compressstoresi128_mask:
15456 case X86::BI__builtin_ia32_compressstoresi256_mask:
15457 case X86::BI__builtin_ia32_compressstoresi512_mask:
15458 case X86::BI__builtin_ia32_compressstorehi128_mask:
15459 case X86::BI__builtin_ia32_compressstorehi256_mask:
15460 case X86::BI__builtin_ia32_compressstorehi512_mask:
15461 case X86::BI__builtin_ia32_compressstoreqi128_mask:
15462 case X86::BI__builtin_ia32_compressstoreqi256_mask:
15463 case X86::BI__builtin_ia32_compressstoreqi512_mask:
15464 return EmitX86CompressStore(*this, Ops);
15465
15466 case X86::BI__builtin_ia32_expanddf128_mask:
15467 case X86::BI__builtin_ia32_expanddf256_mask:
15468 case X86::BI__builtin_ia32_expanddf512_mask:
15469 case X86::BI__builtin_ia32_expandsf128_mask:
15470 case X86::BI__builtin_ia32_expandsf256_mask:
15471 case X86::BI__builtin_ia32_expandsf512_mask:
15472 case X86::BI__builtin_ia32_expanddi128_mask:
15473 case X86::BI__builtin_ia32_expanddi256_mask:
15474 case X86::BI__builtin_ia32_expanddi512_mask:
15475 case X86::BI__builtin_ia32_expandsi128_mask:
15476 case X86::BI__builtin_ia32_expandsi256_mask:
15477 case X86::BI__builtin_ia32_expandsi512_mask:
15478 case X86::BI__builtin_ia32_expandhi128_mask:
15479 case X86::BI__builtin_ia32_expandhi256_mask:
15480 case X86::BI__builtin_ia32_expandhi512_mask:
15481 case X86::BI__builtin_ia32_expandqi128_mask:
15482 case X86::BI__builtin_ia32_expandqi256_mask:
15483 case X86::BI__builtin_ia32_expandqi512_mask:
15484 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
15485
15486 case X86::BI__builtin_ia32_compressdf128_mask:
15487 case X86::BI__builtin_ia32_compressdf256_mask:
15488 case X86::BI__builtin_ia32_compressdf512_mask:
15489 case X86::BI__builtin_ia32_compresssf128_mask:
15490 case X86::BI__builtin_ia32_compresssf256_mask:
15491 case X86::BI__builtin_ia32_compresssf512_mask:
15492 case X86::BI__builtin_ia32_compressdi128_mask:
15493 case X86::BI__builtin_ia32_compressdi256_mask:
15494 case X86::BI__builtin_ia32_compressdi512_mask:
15495 case X86::BI__builtin_ia32_compresssi128_mask:
15496 case X86::BI__builtin_ia32_compresssi256_mask:
15497 case X86::BI__builtin_ia32_compresssi512_mask:
15498 case X86::BI__builtin_ia32_compresshi128_mask:
15499 case X86::BI__builtin_ia32_compresshi256_mask:
15500 case X86::BI__builtin_ia32_compresshi512_mask:
15501 case X86::BI__builtin_ia32_compressqi128_mask:
15502 case X86::BI__builtin_ia32_compressqi256_mask:
15503 case X86::BI__builtin_ia32_compressqi512_mask:
15504 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
15505
15506 case X86::BI__builtin_ia32_gather3div2df:
15507 case X86::BI__builtin_ia32_gather3div2di:
15508 case X86::BI__builtin_ia32_gather3div4df:
15509 case X86::BI__builtin_ia32_gather3div4di:
15510 case X86::BI__builtin_ia32_gather3div4sf:
15511 case X86::BI__builtin_ia32_gather3div4si:
15512 case X86::BI__builtin_ia32_gather3div8sf:
15513 case X86::BI__builtin_ia32_gather3div8si:
15514 case X86::BI__builtin_ia32_gather3siv2df:
15515 case X86::BI__builtin_ia32_gather3siv2di:
15516 case X86::BI__builtin_ia32_gather3siv4df:
15517 case X86::BI__builtin_ia32_gather3siv4di:
15518 case X86::BI__builtin_ia32_gather3siv4sf:
15519 case X86::BI__builtin_ia32_gather3siv4si:
15520 case X86::BI__builtin_ia32_gather3siv8sf:
15521 case X86::BI__builtin_ia32_gather3siv8si:
15522 case X86::BI__builtin_ia32_gathersiv8df:
15523 case X86::BI__builtin_ia32_gathersiv16sf:
15524 case X86::BI__builtin_ia32_gatherdiv8df:
15525 case X86::BI__builtin_ia32_gatherdiv16sf:
15526 case X86::BI__builtin_ia32_gathersiv8di:
15527 case X86::BI__builtin_ia32_gathersiv16si:
15528 case X86::BI__builtin_ia32_gatherdiv8di:
15529 case X86::BI__builtin_ia32_gatherdiv16si: {
15530 Intrinsic::ID IID;
15531 switch (BuiltinID) {
15532 default: llvm_unreachable("Unexpected builtin");
15533 case X86::BI__builtin_ia32_gather3div2df:
15534 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
15535 break;
15536 case X86::BI__builtin_ia32_gather3div2di:
15537 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
15538 break;
15539 case X86::BI__builtin_ia32_gather3div4df:
15540 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
15541 break;
15542 case X86::BI__builtin_ia32_gather3div4di:
15543 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
15544 break;
15545 case X86::BI__builtin_ia32_gather3div4sf:
15546 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
15547 break;
15548 case X86::BI__builtin_ia32_gather3div4si:
15549 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
15550 break;
15551 case X86::BI__builtin_ia32_gather3div8sf:
15552 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
15553 break;
15554 case X86::BI__builtin_ia32_gather3div8si:
15555 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
15556 break;
15557 case X86::BI__builtin_ia32_gather3siv2df:
15558 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
15559 break;
15560 case X86::BI__builtin_ia32_gather3siv2di:
15561 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
15562 break;
15563 case X86::BI__builtin_ia32_gather3siv4df:
15564 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
15565 break;
15566 case X86::BI__builtin_ia32_gather3siv4di:
15567 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
15568 break;
15569 case X86::BI__builtin_ia32_gather3siv4sf:
15570 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
15571 break;
15572 case X86::BI__builtin_ia32_gather3siv4si:
15573 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
15574 break;
15575 case X86::BI__builtin_ia32_gather3siv8sf:
15576 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
15577 break;
15578 case X86::BI__builtin_ia32_gather3siv8si:
15579 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
15580 break;
15581 case X86::BI__builtin_ia32_gathersiv8df:
15582 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
15583 break;
15584 case X86::BI__builtin_ia32_gathersiv16sf:
15585 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
15586 break;
15587 case X86::BI__builtin_ia32_gatherdiv8df:
15588 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
15589 break;
15590 case X86::BI__builtin_ia32_gatherdiv16sf:
15591 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
15592 break;
15593 case X86::BI__builtin_ia32_gathersiv8di:
15594 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
15595 break;
15596 case X86::BI__builtin_ia32_gathersiv16si:
15597 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
15598 break;
15599 case X86::BI__builtin_ia32_gatherdiv8di:
15600 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
15601 break;
15602 case X86::BI__builtin_ia32_gatherdiv16si:
15603 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
15604 break;
15605 }
15606
15607 unsigned MinElts = std::min(
15608 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15609 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15610 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
15611 Function *Intr = CGM.getIntrinsic(IID);
15612 return Builder.CreateCall(Intr, Ops);
15613 }
15614
15615 case X86::BI__builtin_ia32_scattersiv8df:
15616 case X86::BI__builtin_ia32_scattersiv16sf:
15617 case X86::BI__builtin_ia32_scatterdiv8df:
15618 case X86::BI__builtin_ia32_scatterdiv16sf:
15619 case X86::BI__builtin_ia32_scattersiv8di:
15620 case X86::BI__builtin_ia32_scattersiv16si:
15621 case X86::BI__builtin_ia32_scatterdiv8di:
15622 case X86::BI__builtin_ia32_scatterdiv16si:
15623 case X86::BI__builtin_ia32_scatterdiv2df:
15624 case X86::BI__builtin_ia32_scatterdiv2di:
15625 case X86::BI__builtin_ia32_scatterdiv4df:
15626 case X86::BI__builtin_ia32_scatterdiv4di:
15627 case X86::BI__builtin_ia32_scatterdiv4sf:
15628 case X86::BI__builtin_ia32_scatterdiv4si:
15629 case X86::BI__builtin_ia32_scatterdiv8sf:
15630 case X86::BI__builtin_ia32_scatterdiv8si:
15631 case X86::BI__builtin_ia32_scattersiv2df:
15632 case X86::BI__builtin_ia32_scattersiv2di:
15633 case X86::BI__builtin_ia32_scattersiv4df:
15634 case X86::BI__builtin_ia32_scattersiv4di:
15635 case X86::BI__builtin_ia32_scattersiv4sf:
15636 case X86::BI__builtin_ia32_scattersiv4si:
15637 case X86::BI__builtin_ia32_scattersiv8sf:
15638 case X86::BI__builtin_ia32_scattersiv8si: {
15639 Intrinsic::ID IID;
15640 switch (BuiltinID) {
15641 default: llvm_unreachable("Unexpected builtin");
15642 case X86::BI__builtin_ia32_scattersiv8df:
15643 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15644 break;
15645 case X86::BI__builtin_ia32_scattersiv16sf:
15646 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15647 break;
15648 case X86::BI__builtin_ia32_scatterdiv8df:
15649 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15650 break;
15651 case X86::BI__builtin_ia32_scatterdiv16sf:
15652 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15653 break;
15654 case X86::BI__builtin_ia32_scattersiv8di:
15655 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15656 break;
15657 case X86::BI__builtin_ia32_scattersiv16si:
15658 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15659 break;
15660 case X86::BI__builtin_ia32_scatterdiv8di:
15661 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15662 break;
15663 case X86::BI__builtin_ia32_scatterdiv16si:
15664 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15665 break;
15666 case X86::BI__builtin_ia32_scatterdiv2df:
15667 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15668 break;
15669 case X86::BI__builtin_ia32_scatterdiv2di:
15670 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15671 break;
15672 case X86::BI__builtin_ia32_scatterdiv4df:
15673 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15674 break;
15675 case X86::BI__builtin_ia32_scatterdiv4di:
15676 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15677 break;
15678 case X86::BI__builtin_ia32_scatterdiv4sf:
15679 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15680 break;
15681 case X86::BI__builtin_ia32_scatterdiv4si:
15682 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15683 break;
15684 case X86::BI__builtin_ia32_scatterdiv8sf:
15685 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15686 break;
15687 case X86::BI__builtin_ia32_scatterdiv8si:
15688 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15689 break;
15690 case X86::BI__builtin_ia32_scattersiv2df:
15691 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15692 break;
15693 case X86::BI__builtin_ia32_scattersiv2di:
15694 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15695 break;
15696 case X86::BI__builtin_ia32_scattersiv4df:
15697 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15698 break;
15699 case X86::BI__builtin_ia32_scattersiv4di:
15700 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15701 break;
15702 case X86::BI__builtin_ia32_scattersiv4sf:
15703 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15704 break;
15705 case X86::BI__builtin_ia32_scattersiv4si:
15706 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15707 break;
15708 case X86::BI__builtin_ia32_scattersiv8sf:
15709 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15710 break;
15711 case X86::BI__builtin_ia32_scattersiv8si:
15712 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15713 break;
15714 }
15715
15716 unsigned MinElts = std::min(
15717 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15718 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15719 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15720 Function *Intr = CGM.getIntrinsic(IID);
15721 return Builder.CreateCall(Intr, Ops);
15722 }
15723
15724 case X86::BI__builtin_ia32_vextractf128_pd256:
15725 case X86::BI__builtin_ia32_vextractf128_ps256:
15726 case X86::BI__builtin_ia32_vextractf128_si256:
15727 case X86::BI__builtin_ia32_extract128i256:
15728 case X86::BI__builtin_ia32_extractf64x4_mask:
15729 case X86::BI__builtin_ia32_extractf32x4_mask:
15730 case X86::BI__builtin_ia32_extracti64x4_mask:
15731 case X86::BI__builtin_ia32_extracti32x4_mask:
15732 case X86::BI__builtin_ia32_extractf32x8_mask:
15733 case X86::BI__builtin_ia32_extracti32x8_mask:
15734 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15735 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15736 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15737 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15738 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15739 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15740 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15741 unsigned NumElts = DstTy->getNumElements();
15742 unsigned SrcNumElts =
15743 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15744 unsigned SubVectors = SrcNumElts / NumElts;
15745 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15746 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15747 Index &= SubVectors - 1; // Remove any extra bits.
15748 Index *= NumElts;
15749
15750 int Indices[16];
15751 for (unsigned i = 0; i != NumElts; ++i)
15752 Indices[i] = i + Index;
15753
15754 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15755 "extract");
15756
15757 if (Ops.size() == 4)
15758 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15759
15760 return Res;
15761 }
15762 case X86::BI__builtin_ia32_vinsertf128_pd256:
15763 case X86::BI__builtin_ia32_vinsertf128_ps256:
15764 case X86::BI__builtin_ia32_vinsertf128_si256:
15765 case X86::BI__builtin_ia32_insert128i256:
15766 case X86::BI__builtin_ia32_insertf64x4:
15767 case X86::BI__builtin_ia32_insertf32x4:
15768 case X86::BI__builtin_ia32_inserti64x4:
15769 case X86::BI__builtin_ia32_inserti32x4:
15770 case X86::BI__builtin_ia32_insertf32x8:
15771 case X86::BI__builtin_ia32_inserti32x8:
15772 case X86::BI__builtin_ia32_insertf32x4_256:
15773 case X86::BI__builtin_ia32_inserti32x4_256:
15774 case X86::BI__builtin_ia32_insertf64x2_256:
15775 case X86::BI__builtin_ia32_inserti64x2_256:
15776 case X86::BI__builtin_ia32_insertf64x2_512:
15777 case X86::BI__builtin_ia32_inserti64x2_512: {
15778 unsigned DstNumElts =
15779 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15780 unsigned SrcNumElts =
15781 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15782 unsigned SubVectors = DstNumElts / SrcNumElts;
15783 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15784 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15785 Index &= SubVectors - 1; // Remove any extra bits.
15786 Index *= SrcNumElts;
15787
15788 int Indices[16];
15789 for (unsigned i = 0; i != DstNumElts; ++i)
15790 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15791
15792 Value *Op1 = Builder.CreateShuffleVector(
15793 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15794
15795 for (unsigned i = 0; i != DstNumElts; ++i) {
15796 if (i >= Index && i < (Index + SrcNumElts))
15797 Indices[i] = (i - Index) + DstNumElts;
15798 else
15799 Indices[i] = i;
15800 }
15801
15802 return Builder.CreateShuffleVector(Ops[0], Op1,
15803 ArrayRef(Indices, DstNumElts), "insert");
15804 }
15805 case X86::BI__builtin_ia32_pmovqd512_mask:
15806 case X86::BI__builtin_ia32_pmovwb512_mask: {
15807 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15808 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15809 }
15810 case X86::BI__builtin_ia32_pmovdb512_mask:
15811 case X86::BI__builtin_ia32_pmovdw512_mask:
15812 case X86::BI__builtin_ia32_pmovqw512_mask: {
15813 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15814 if (C->isAllOnesValue())
15815 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15816
15817 Intrinsic::ID IID;
15818 switch (BuiltinID) {
15819 default: llvm_unreachable("Unsupported intrinsic!");
15820 case X86::BI__builtin_ia32_pmovdb512_mask:
15821 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15822 break;
15823 case X86::BI__builtin_ia32_pmovdw512_mask:
15824 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15825 break;
15826 case X86::BI__builtin_ia32_pmovqw512_mask:
15827 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15828 break;
15829 }
15830
15831 Function *Intr = CGM.getIntrinsic(IID);
15832 return Builder.CreateCall(Intr, Ops);
15833 }
15834 case X86::BI__builtin_ia32_pblendw128:
15835 case X86::BI__builtin_ia32_blendpd:
15836 case X86::BI__builtin_ia32_blendps:
15837 case X86::BI__builtin_ia32_blendpd256:
15838 case X86::BI__builtin_ia32_blendps256:
15839 case X86::BI__builtin_ia32_pblendw256:
15840 case X86::BI__builtin_ia32_pblendd128:
15841 case X86::BI__builtin_ia32_pblendd256: {
15842 unsigned NumElts =
15843 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15844 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15845
15846 int Indices[16];
15847 // If there are more than 8 elements, the immediate is used twice so make
15848 // sure we handle that.
15849 for (unsigned i = 0; i != NumElts; ++i)
15850 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15851
15852 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15853 ArrayRef(Indices, NumElts), "blend");
15854 }
15855 case X86::BI__builtin_ia32_pshuflw:
15856 case X86::BI__builtin_ia32_pshuflw256:
15857 case X86::BI__builtin_ia32_pshuflw512: {
15858 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15859 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15860 unsigned NumElts = Ty->getNumElements();
15861
15862 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15863 Imm = (Imm & 0xff) * 0x01010101;
15864
15865 int Indices[32];
15866 for (unsigned l = 0; l != NumElts; l += 8) {
15867 for (unsigned i = 0; i != 4; ++i) {
15868 Indices[l + i] = l + (Imm & 3);
15869 Imm >>= 2;
15870 }
15871 for (unsigned i = 4; i != 8; ++i)
15872 Indices[l + i] = l + i;
15873 }
15874
15875 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15876 "pshuflw");
15877 }
15878 case X86::BI__builtin_ia32_pshufhw:
15879 case X86::BI__builtin_ia32_pshufhw256:
15880 case X86::BI__builtin_ia32_pshufhw512: {
15881 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15882 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15883 unsigned NumElts = Ty->getNumElements();
15884
15885 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15886 Imm = (Imm & 0xff) * 0x01010101;
15887
15888 int Indices[32];
15889 for (unsigned l = 0; l != NumElts; l += 8) {
15890 for (unsigned i = 0; i != 4; ++i)
15891 Indices[l + i] = l + i;
15892 for (unsigned i = 4; i != 8; ++i) {
15893 Indices[l + i] = l + 4 + (Imm & 3);
15894 Imm >>= 2;
15895 }
15896 }
15897
15898 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15899 "pshufhw");
15900 }
15901 case X86::BI__builtin_ia32_pshufd:
15902 case X86::BI__builtin_ia32_pshufd256:
15903 case X86::BI__builtin_ia32_pshufd512:
15904 case X86::BI__builtin_ia32_vpermilpd:
15905 case X86::BI__builtin_ia32_vpermilps:
15906 case X86::BI__builtin_ia32_vpermilpd256:
15907 case X86::BI__builtin_ia32_vpermilps256:
15908 case X86::BI__builtin_ia32_vpermilpd512:
15909 case X86::BI__builtin_ia32_vpermilps512: {
15910 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15911 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15912 unsigned NumElts = Ty->getNumElements();
15913 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15914 unsigned NumLaneElts = NumElts / NumLanes;
15915
15916 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15917 Imm = (Imm & 0xff) * 0x01010101;
15918
15919 int Indices[16];
15920 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15921 for (unsigned i = 0; i != NumLaneElts; ++i) {
15922 Indices[i + l] = (Imm % NumLaneElts) + l;
15923 Imm /= NumLaneElts;
15924 }
15925 }
15926
15927 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15928 "permil");
15929 }
15930 case X86::BI__builtin_ia32_shufpd:
15931 case X86::BI__builtin_ia32_shufpd256:
15932 case X86::BI__builtin_ia32_shufpd512:
15933 case X86::BI__builtin_ia32_shufps:
15934 case X86::BI__builtin_ia32_shufps256:
15935 case X86::BI__builtin_ia32_shufps512: {
15936 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15937 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15938 unsigned NumElts = Ty->getNumElements();
15939 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15940 unsigned NumLaneElts = NumElts / NumLanes;
15941
15942 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15943 Imm = (Imm & 0xff) * 0x01010101;
15944
15945 int Indices[16];
15946 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15947 for (unsigned i = 0; i != NumLaneElts; ++i) {
15948 unsigned Index = Imm % NumLaneElts;
15949 Imm /= NumLaneElts;
15950 if (i >= (NumLaneElts / 2))
15951 Index += NumElts;
15952 Indices[l + i] = l + Index;
15953 }
15954 }
15955
15956 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15957 ArrayRef(Indices, NumElts), "shufp");
15958 }
15959 case X86::BI__builtin_ia32_permdi256:
15960 case X86::BI__builtin_ia32_permdf256:
15961 case X86::BI__builtin_ia32_permdi512:
15962 case X86::BI__builtin_ia32_permdf512: {
15963 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15964 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15965 unsigned NumElts = Ty->getNumElements();
15966
15967 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15968 int Indices[8];
15969 for (unsigned l = 0; l != NumElts; l += 4)
15970 for (unsigned i = 0; i != 4; ++i)
15971 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15972
15973 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15974 "perm");
15975 }
15976 case X86::BI__builtin_ia32_palignr128:
15977 case X86::BI__builtin_ia32_palignr256:
15978 case X86::BI__builtin_ia32_palignr512: {
15979 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15980
15981 unsigned NumElts =
15982 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15983 assert(NumElts % 16 == 0);
15984
15985 // If palignr is shifting the pair of vectors more than the size of two
15986 // lanes, emit zero.
15987 if (ShiftVal >= 32)
15988 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15989
15990 // If palignr is shifting the pair of input vectors more than one lane,
15991 // but less than two lanes, convert to shifting in zeroes.
15992 if (ShiftVal > 16) {
15993 ShiftVal -= 16;
15994 Ops[1] = Ops[0];
15995 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
15996 }
15997
15998 int Indices[64];
15999 // 256-bit palignr operates on 128-bit lanes so we need to handle that
16000 for (unsigned l = 0; l != NumElts; l += 16) {
16001 for (unsigned i = 0; i != 16; ++i) {
16002 unsigned Idx = ShiftVal + i;
16003 if (Idx >= 16)
16004 Idx += NumElts - 16; // End of lane, switch operand.
16005 Indices[l + i] = Idx + l;
16006 }
16007 }
16008
16009 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16010 ArrayRef(Indices, NumElts), "palignr");
16011 }
16012 case X86::BI__builtin_ia32_alignd128:
16013 case X86::BI__builtin_ia32_alignd256:
16014 case X86::BI__builtin_ia32_alignd512:
16015 case X86::BI__builtin_ia32_alignq128:
16016 case X86::BI__builtin_ia32_alignq256:
16017 case X86::BI__builtin_ia32_alignq512: {
16018 unsigned NumElts =
16019 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16020 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16021
16022 // Mask the shift amount to width of a vector.
16023 ShiftVal &= NumElts - 1;
16024
16025 int Indices[16];
16026 for (unsigned i = 0; i != NumElts; ++i)
16027 Indices[i] = i + ShiftVal;
16028
16029 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16030 ArrayRef(Indices, NumElts), "valign");
16031 }
16032 case X86::BI__builtin_ia32_shuf_f32x4_256:
16033 case X86::BI__builtin_ia32_shuf_f64x2_256:
16034 case X86::BI__builtin_ia32_shuf_i32x4_256:
16035 case X86::BI__builtin_ia32_shuf_i64x2_256:
16036 case X86::BI__builtin_ia32_shuf_f32x4:
16037 case X86::BI__builtin_ia32_shuf_f64x2:
16038 case X86::BI__builtin_ia32_shuf_i32x4:
16039 case X86::BI__builtin_ia32_shuf_i64x2: {
16040 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16041 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16042 unsigned NumElts = Ty->getNumElements();
16043 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
16044 unsigned NumLaneElts = NumElts / NumLanes;
16045
16046 int Indices[16];
16047 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16048 unsigned Index = (Imm % NumLanes) * NumLaneElts;
16049 Imm /= NumLanes; // Discard the bits we just used.
16050 if (l >= (NumElts / 2))
16051 Index += NumElts; // Switch to other source.
16052 for (unsigned i = 0; i != NumLaneElts; ++i) {
16053 Indices[l + i] = Index + i;
16054 }
16055 }
16056
16057 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16058 ArrayRef(Indices, NumElts), "shuf");
16059 }
16060
16061 case X86::BI__builtin_ia32_vperm2f128_pd256:
16062 case X86::BI__builtin_ia32_vperm2f128_ps256:
16063 case X86::BI__builtin_ia32_vperm2f128_si256:
16064 case X86::BI__builtin_ia32_permti256: {
16065 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16066 unsigned NumElts =
16067 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16068
16069 // This takes a very simple approach since there are two lanes and a
16070 // shuffle can have 2 inputs. So we reserve the first input for the first
16071 // lane and the second input for the second lane. This may result in
16072 // duplicate sources, but this can be dealt with in the backend.
16073
16074 Value *OutOps[2];
16075 int Indices[8];
16076 for (unsigned l = 0; l != 2; ++l) {
16077 // Determine the source for this lane.
16078 if (Imm & (1 << ((l * 4) + 3)))
16079 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
16080 else if (Imm & (1 << ((l * 4) + 1)))
16081 OutOps[l] = Ops[1];
16082 else
16083 OutOps[l] = Ops[0];
16084
16085 for (unsigned i = 0; i != NumElts/2; ++i) {
16086 // Start with ith element of the source for this lane.
16087 unsigned Idx = (l * NumElts) + i;
16088 // If bit 0 of the immediate half is set, switch to the high half of
16089 // the source.
16090 if (Imm & (1 << (l * 4)))
16091 Idx += NumElts/2;
16092 Indices[(l * (NumElts/2)) + i] = Idx;
16093 }
16094 }
16095
16096 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
16097 ArrayRef(Indices, NumElts), "vperm");
16098 }
16099
16100 case X86::BI__builtin_ia32_pslldqi128_byteshift:
16101 case X86::BI__builtin_ia32_pslldqi256_byteshift:
16102 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
16103 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16104 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16105 // Builtin type is vXi64 so multiply by 8 to get bytes.
16106 unsigned NumElts = ResultType->getNumElements() * 8;
16107
16108 // If pslldq is shifting the vector more than 15 bytes, emit zero.
16109 if (ShiftVal >= 16)
16110 return llvm::Constant::getNullValue(ResultType);
16111
16112 int Indices[64];
16113 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
16114 for (unsigned l = 0; l != NumElts; l += 16) {
16115 for (unsigned i = 0; i != 16; ++i) {
16116 unsigned Idx = NumElts + i - ShiftVal;
16117 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
16118 Indices[l + i] = Idx + l;
16119 }
16120 }
16121
16122 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16123 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16124 Value *Zero = llvm::Constant::getNullValue(VecTy);
16125 Value *SV = Builder.CreateShuffleVector(
16126 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
16127 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
16128 }
16129 case X86::BI__builtin_ia32_psrldqi128_byteshift:
16130 case X86::BI__builtin_ia32_psrldqi256_byteshift:
16131 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
16132 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16133 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16134 // Builtin type is vXi64 so multiply by 8 to get bytes.
16135 unsigned NumElts = ResultType->getNumElements() * 8;
16136
16137 // If psrldq is shifting the vector more than 15 bytes, emit zero.
16138 if (ShiftVal >= 16)
16139 return llvm::Constant::getNullValue(ResultType);
16140
16141 int Indices[64];
16142 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
16143 for (unsigned l = 0; l != NumElts; l += 16) {
16144 for (unsigned i = 0; i != 16; ++i) {
16145 unsigned Idx = i + ShiftVal;
16146 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
16147 Indices[l + i] = Idx + l;
16148 }
16149 }
16150
16151 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16152 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16153 Value *Zero = llvm::Constant::getNullValue(VecTy);
16154 Value *SV = Builder.CreateShuffleVector(
16155 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
16156 return Builder.CreateBitCast(SV, ResultType, "cast");
16157 }
16158 case X86::BI__builtin_ia32_kshiftliqi:
16159 case X86::BI__builtin_ia32_kshiftlihi:
16160 case X86::BI__builtin_ia32_kshiftlisi:
16161 case X86::BI__builtin_ia32_kshiftlidi: {
16162 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16163 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16164
16165 if (ShiftVal >= NumElts)
16166 return llvm::Constant::getNullValue(Ops[0]->getType());
16167
16168 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16169
16170 int Indices[64];
16171 for (unsigned i = 0; i != NumElts; ++i)
16172 Indices[i] = NumElts + i - ShiftVal;
16173
16174 Value *Zero = llvm::Constant::getNullValue(In->getType());
16175 Value *SV = Builder.CreateShuffleVector(
16176 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
16177 return Builder.CreateBitCast(SV, Ops[0]->getType());
16178 }
16179 case X86::BI__builtin_ia32_kshiftriqi:
16180 case X86::BI__builtin_ia32_kshiftrihi:
16181 case X86::BI__builtin_ia32_kshiftrisi:
16182 case X86::BI__builtin_ia32_kshiftridi: {
16183 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16184 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16185
16186 if (ShiftVal >= NumElts)
16187 return llvm::Constant::getNullValue(Ops[0]->getType());
16188
16189 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16190
16191 int Indices[64];
16192 for (unsigned i = 0; i != NumElts; ++i)
16193 Indices[i] = i + ShiftVal;
16194
16195 Value *Zero = llvm::Constant::getNullValue(In->getType());
16196 Value *SV = Builder.CreateShuffleVector(
16197 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
16198 return Builder.CreateBitCast(SV, Ops[0]->getType());
16199 }
16200 case X86::BI__builtin_ia32_movnti:
16201 case X86::BI__builtin_ia32_movnti64:
16202 case X86::BI__builtin_ia32_movntsd:
16203 case X86::BI__builtin_ia32_movntss: {
16204 llvm::MDNode *Node = llvm::MDNode::get(
16205 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
16206
16207 Value *Ptr = Ops[0];
16208 Value *Src = Ops[1];
16209
16210 // Extract the 0'th element of the source vector.
16211 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
16212 BuiltinID == X86::BI__builtin_ia32_movntss)
16213 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
16214
16215 // Unaligned nontemporal store of the scalar value.
16216 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
16217 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
16218 SI->setAlignment(llvm::Align(1));
16219 return SI;
16220 }
16221 // Rotate is a special case of funnel shift - 1st 2 args are the same.
16222 case X86::BI__builtin_ia32_vprotb:
16223 case X86::BI__builtin_ia32_vprotw:
16224 case X86::BI__builtin_ia32_vprotd:
16225 case X86::BI__builtin_ia32_vprotq:
16226 case X86::BI__builtin_ia32_vprotbi:
16227 case X86::BI__builtin_ia32_vprotwi:
16228 case X86::BI__builtin_ia32_vprotdi:
16229 case X86::BI__builtin_ia32_vprotqi:
16230 case X86::BI__builtin_ia32_prold128:
16231 case X86::BI__builtin_ia32_prold256:
16232 case X86::BI__builtin_ia32_prold512:
16233 case X86::BI__builtin_ia32_prolq128:
16234 case X86::BI__builtin_ia32_prolq256:
16235 case X86::BI__builtin_ia32_prolq512:
16236 case X86::BI__builtin_ia32_prolvd128:
16237 case X86::BI__builtin_ia32_prolvd256:
16238 case X86::BI__builtin_ia32_prolvd512:
16239 case X86::BI__builtin_ia32_prolvq128:
16240 case X86::BI__builtin_ia32_prolvq256:
16241 case X86::BI__builtin_ia32_prolvq512:
16242 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
16243 case X86::BI__builtin_ia32_prord128:
16244 case X86::BI__builtin_ia32_prord256:
16245 case X86::BI__builtin_ia32_prord512:
16246 case X86::BI__builtin_ia32_prorq128:
16247 case X86::BI__builtin_ia32_prorq256:
16248 case X86::BI__builtin_ia32_prorq512:
16249 case X86::BI__builtin_ia32_prorvd128:
16250 case X86::BI__builtin_ia32_prorvd256:
16251 case X86::BI__builtin_ia32_prorvd512:
16252 case X86::BI__builtin_ia32_prorvq128:
16253 case X86::BI__builtin_ia32_prorvq256:
16254 case X86::BI__builtin_ia32_prorvq512:
16255 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
16256 case X86::BI__builtin_ia32_selectb_128:
16257 case X86::BI__builtin_ia32_selectb_256:
16258 case X86::BI__builtin_ia32_selectb_512:
16259 case X86::BI__builtin_ia32_selectw_128:
16260 case X86::BI__builtin_ia32_selectw_256:
16261 case X86::BI__builtin_ia32_selectw_512:
16262 case X86::BI__builtin_ia32_selectd_128:
16263 case X86::BI__builtin_ia32_selectd_256:
16264 case X86::BI__builtin_ia32_selectd_512:
16265 case X86::BI__builtin_ia32_selectq_128:
16266 case X86::BI__builtin_ia32_selectq_256:
16267 case X86::BI__builtin_ia32_selectq_512:
16268 case X86::BI__builtin_ia32_selectph_128:
16269 case X86::BI__builtin_ia32_selectph_256:
16270 case X86::BI__builtin_ia32_selectph_512:
16271 case X86::BI__builtin_ia32_selectpbf_128:
16272 case X86::BI__builtin_ia32_selectpbf_256:
16273 case X86::BI__builtin_ia32_selectpbf_512:
16274 case X86::BI__builtin_ia32_selectps_128:
16275 case X86::BI__builtin_ia32_selectps_256:
16276 case X86::BI__builtin_ia32_selectps_512:
16277 case X86::BI__builtin_ia32_selectpd_128:
16278 case X86::BI__builtin_ia32_selectpd_256:
16279 case X86::BI__builtin_ia32_selectpd_512:
16280 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
16281 case X86::BI__builtin_ia32_selectsh_128:
16282 case X86::BI__builtin_ia32_selectsbf_128:
16283 case X86::BI__builtin_ia32_selectss_128:
16284 case X86::BI__builtin_ia32_selectsd_128: {
16285 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16286 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16287 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
16288 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
16289 }
16290 case X86::BI__builtin_ia32_cmpb128_mask:
16291 case X86::BI__builtin_ia32_cmpb256_mask:
16292 case X86::BI__builtin_ia32_cmpb512_mask:
16293 case X86::BI__builtin_ia32_cmpw128_mask:
16294 case X86::BI__builtin_ia32_cmpw256_mask:
16295 case X86::BI__builtin_ia32_cmpw512_mask:
16296 case X86::BI__builtin_ia32_cmpd128_mask:
16297 case X86::BI__builtin_ia32_cmpd256_mask:
16298 case X86::BI__builtin_ia32_cmpd512_mask:
16299 case X86::BI__builtin_ia32_cmpq128_mask:
16300 case X86::BI__builtin_ia32_cmpq256_mask:
16301 case X86::BI__builtin_ia32_cmpq512_mask: {
16302 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16303 return EmitX86MaskedCompare(*this, CC, true, Ops);
16304 }
16305 case X86::BI__builtin_ia32_ucmpb128_mask:
16306 case X86::BI__builtin_ia32_ucmpb256_mask:
16307 case X86::BI__builtin_ia32_ucmpb512_mask:
16308 case X86::BI__builtin_ia32_ucmpw128_mask:
16309 case X86::BI__builtin_ia32_ucmpw256_mask:
16310 case X86::BI__builtin_ia32_ucmpw512_mask:
16311 case X86::BI__builtin_ia32_ucmpd128_mask:
16312 case X86::BI__builtin_ia32_ucmpd256_mask:
16313 case X86::BI__builtin_ia32_ucmpd512_mask:
16314 case X86::BI__builtin_ia32_ucmpq128_mask:
16315 case X86::BI__builtin_ia32_ucmpq256_mask:
16316 case X86::BI__builtin_ia32_ucmpq512_mask: {
16317 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16318 return EmitX86MaskedCompare(*this, CC, false, Ops);
16319 }
16320 case X86::BI__builtin_ia32_vpcomb:
16321 case X86::BI__builtin_ia32_vpcomw:
16322 case X86::BI__builtin_ia32_vpcomd:
16323 case X86::BI__builtin_ia32_vpcomq:
16324 return EmitX86vpcom(*this, Ops, true);
16325 case X86::BI__builtin_ia32_vpcomub:
16326 case X86::BI__builtin_ia32_vpcomuw:
16327 case X86::BI__builtin_ia32_vpcomud:
16328 case X86::BI__builtin_ia32_vpcomuq:
16329 return EmitX86vpcom(*this, Ops, false);
16330
16331 case X86::BI__builtin_ia32_kortestcqi:
16332 case X86::BI__builtin_ia32_kortestchi:
16333 case X86::BI__builtin_ia32_kortestcsi:
16334 case X86::BI__builtin_ia32_kortestcdi: {
16335 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16336 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
16337 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16338 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16339 }
16340 case X86::BI__builtin_ia32_kortestzqi:
16341 case X86::BI__builtin_ia32_kortestzhi:
16342 case X86::BI__builtin_ia32_kortestzsi:
16343 case X86::BI__builtin_ia32_kortestzdi: {
16344 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16345 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
16346 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16347 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16348 }
16349
16350 case X86::BI__builtin_ia32_ktestcqi:
16351 case X86::BI__builtin_ia32_ktestzqi:
16352 case X86::BI__builtin_ia32_ktestchi:
16353 case X86::BI__builtin_ia32_ktestzhi:
16354 case X86::BI__builtin_ia32_ktestcsi:
16355 case X86::BI__builtin_ia32_ktestzsi:
16356 case X86::BI__builtin_ia32_ktestcdi:
16357 case X86::BI__builtin_ia32_ktestzdi: {
16358 Intrinsic::ID IID;
16359 switch (BuiltinID) {
16360 default: llvm_unreachable("Unsupported intrinsic!");
16361 case X86::BI__builtin_ia32_ktestcqi:
16362 IID = Intrinsic::x86_avx512_ktestc_b;
16363 break;
16364 case X86::BI__builtin_ia32_ktestzqi:
16365 IID = Intrinsic::x86_avx512_ktestz_b;
16366 break;
16367 case X86::BI__builtin_ia32_ktestchi:
16368 IID = Intrinsic::x86_avx512_ktestc_w;
16369 break;
16370 case X86::BI__builtin_ia32_ktestzhi:
16371 IID = Intrinsic::x86_avx512_ktestz_w;
16372 break;
16373 case X86::BI__builtin_ia32_ktestcsi:
16374 IID = Intrinsic::x86_avx512_ktestc_d;
16375 break;
16376 case X86::BI__builtin_ia32_ktestzsi:
16377 IID = Intrinsic::x86_avx512_ktestz_d;
16378 break;
16379 case X86::BI__builtin_ia32_ktestcdi:
16380 IID = Intrinsic::x86_avx512_ktestc_q;
16381 break;
16382 case X86::BI__builtin_ia32_ktestzdi:
16383 IID = Intrinsic::x86_avx512_ktestz_q;
16384 break;
16385 }
16386
16387 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16388 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16389 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16390 Function *Intr = CGM.getIntrinsic(IID);
16391 return Builder.CreateCall(Intr, {LHS, RHS});
16392 }
16393
16394 case X86::BI__builtin_ia32_kaddqi:
16395 case X86::BI__builtin_ia32_kaddhi:
16396 case X86::BI__builtin_ia32_kaddsi:
16397 case X86::BI__builtin_ia32_kadddi: {
16398 Intrinsic::ID IID;
16399 switch (BuiltinID) {
16400 default: llvm_unreachable("Unsupported intrinsic!");
16401 case X86::BI__builtin_ia32_kaddqi:
16402 IID = Intrinsic::x86_avx512_kadd_b;
16403 break;
16404 case X86::BI__builtin_ia32_kaddhi:
16405 IID = Intrinsic::x86_avx512_kadd_w;
16406 break;
16407 case X86::BI__builtin_ia32_kaddsi:
16408 IID = Intrinsic::x86_avx512_kadd_d;
16409 break;
16410 case X86::BI__builtin_ia32_kadddi:
16411 IID = Intrinsic::x86_avx512_kadd_q;
16412 break;
16413 }
16414
16415 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16416 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16417 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16418 Function *Intr = CGM.getIntrinsic(IID);
16419 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
16420 return Builder.CreateBitCast(Res, Ops[0]->getType());
16421 }
16422 case X86::BI__builtin_ia32_kandqi:
16423 case X86::BI__builtin_ia32_kandhi:
16424 case X86::BI__builtin_ia32_kandsi:
16425 case X86::BI__builtin_ia32_kanddi:
16426 return EmitX86MaskLogic(*this, Instruction::And, Ops);
16427 case X86::BI__builtin_ia32_kandnqi:
16428 case X86::BI__builtin_ia32_kandnhi:
16429 case X86::BI__builtin_ia32_kandnsi:
16430 case X86::BI__builtin_ia32_kandndi:
16431 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
16432 case X86::BI__builtin_ia32_korqi:
16433 case X86::BI__builtin_ia32_korhi:
16434 case X86::BI__builtin_ia32_korsi:
16435 case X86::BI__builtin_ia32_kordi:
16436 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
16437 case X86::BI__builtin_ia32_kxnorqi:
16438 case X86::BI__builtin_ia32_kxnorhi:
16439 case X86::BI__builtin_ia32_kxnorsi:
16440 case X86::BI__builtin_ia32_kxnordi:
16441 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
16442 case X86::BI__builtin_ia32_kxorqi:
16443 case X86::BI__builtin_ia32_kxorhi:
16444 case X86::BI__builtin_ia32_kxorsi:
16445 case X86::BI__builtin_ia32_kxordi:
16446 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
16447 case X86::BI__builtin_ia32_knotqi:
16448 case X86::BI__builtin_ia32_knothi:
16449 case X86::BI__builtin_ia32_knotsi:
16450 case X86::BI__builtin_ia32_knotdi: {
16451 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16452 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16453 return Builder.CreateBitCast(Builder.CreateNot(Res),
16454 Ops[0]->getType());
16455 }
16456 case X86::BI__builtin_ia32_kmovb:
16457 case X86::BI__builtin_ia32_kmovw:
16458 case X86::BI__builtin_ia32_kmovd:
16459 case X86::BI__builtin_ia32_kmovq: {
16460 // Bitcast to vXi1 type and then back to integer. This gets the mask
16461 // register type into the IR, but might be optimized out depending on
16462 // what's around it.
16463 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16464 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16465 return Builder.CreateBitCast(Res, Ops[0]->getType());
16466 }
16467
16468 case X86::BI__builtin_ia32_kunpckdi:
16469 case X86::BI__builtin_ia32_kunpcksi:
16470 case X86::BI__builtin_ia32_kunpckhi: {
16471 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16472 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16473 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16474 int Indices[64];
16475 for (unsigned i = 0; i != NumElts; ++i)
16476 Indices[i] = i;
16477
16478 // First extract half of each vector. This gives better codegen than
16479 // doing it in a single shuffle.
16480 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
16481 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
16482 // Concat the vectors.
16483 // NOTE: Operands are swapped to match the intrinsic definition.
16484 Value *Res =
16485 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
16486 return Builder.CreateBitCast(Res, Ops[0]->getType());
16487 }
16488
16489 case X86::BI__builtin_ia32_vplzcntd_128:
16490 case X86::BI__builtin_ia32_vplzcntd_256:
16491 case X86::BI__builtin_ia32_vplzcntd_512:
16492 case X86::BI__builtin_ia32_vplzcntq_128:
16493 case X86::BI__builtin_ia32_vplzcntq_256:
16494 case X86::BI__builtin_ia32_vplzcntq_512: {
16495 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
16496 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
16497 }
16498 case X86::BI__builtin_ia32_sqrtss:
16499 case X86::BI__builtin_ia32_sqrtsd: {
16500 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
16501 Function *F;
16502 if (Builder.getIsFPConstrained()) {
16503 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16504 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16505 A->getType());
16506 A = Builder.CreateConstrainedFPCall(F, {A});
16507 } else {
16508 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16509 A = Builder.CreateCall(F, {A});
16510 }
16511 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16512 }
16513 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16514 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16515 case X86::BI__builtin_ia32_sqrtss_round_mask: {
16516 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
16517 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16518 // otherwise keep the intrinsic.
16519 if (CC != 4) {
16520 Intrinsic::ID IID;
16521
16522 switch (BuiltinID) {
16523 default:
16524 llvm_unreachable("Unsupported intrinsic!");
16525 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16526 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
16527 break;
16528 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16529 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
16530 break;
16531 case X86::BI__builtin_ia32_sqrtss_round_mask:
16532 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
16533 break;
16534 }
16535 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16536 }
16537 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16538 Function *F;
16539 if (Builder.getIsFPConstrained()) {
16540 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16541 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16542 A->getType());
16543 A = Builder.CreateConstrainedFPCall(F, A);
16544 } else {
16545 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16546 A = Builder.CreateCall(F, A);
16547 }
16548 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16549 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
16550 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16551 }
16552 case X86::BI__builtin_ia32_sqrtpd256:
16553 case X86::BI__builtin_ia32_sqrtpd:
16554 case X86::BI__builtin_ia32_sqrtps256:
16555 case X86::BI__builtin_ia32_sqrtps:
16556 case X86::BI__builtin_ia32_sqrtph256:
16557 case X86::BI__builtin_ia32_sqrtph:
16558 case X86::BI__builtin_ia32_sqrtph512:
16559 case X86::BI__builtin_ia32_vsqrtnepbf16256:
16560 case X86::BI__builtin_ia32_vsqrtnepbf16:
16561 case X86::BI__builtin_ia32_vsqrtnepbf16512:
16562 case X86::BI__builtin_ia32_sqrtps512:
16563 case X86::BI__builtin_ia32_sqrtpd512: {
16564 if (Ops.size() == 2) {
16565 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16566 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16567 // otherwise keep the intrinsic.
16568 if (CC != 4) {
16569 Intrinsic::ID IID;
16570
16571 switch (BuiltinID) {
16572 default:
16573 llvm_unreachable("Unsupported intrinsic!");
16574 case X86::BI__builtin_ia32_sqrtph512:
16575 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
16576 break;
16577 case X86::BI__builtin_ia32_sqrtps512:
16578 IID = Intrinsic::x86_avx512_sqrt_ps_512;
16579 break;
16580 case X86::BI__builtin_ia32_sqrtpd512:
16581 IID = Intrinsic::x86_avx512_sqrt_pd_512;
16582 break;
16583 }
16584 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16585 }
16586 }
16587 if (Builder.getIsFPConstrained()) {
16588 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16589 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16590 Ops[0]->getType());
16591 return Builder.CreateConstrainedFPCall(F, Ops[0]);
16592 } else {
16593 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
16594 return Builder.CreateCall(F, Ops[0]);
16595 }
16596 }
16597
16598 case X86::BI__builtin_ia32_pmuludq128:
16599 case X86::BI__builtin_ia32_pmuludq256:
16600 case X86::BI__builtin_ia32_pmuludq512:
16601 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
16602
16603 case X86::BI__builtin_ia32_pmuldq128:
16604 case X86::BI__builtin_ia32_pmuldq256:
16605 case X86::BI__builtin_ia32_pmuldq512:
16606 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
16607
16608 case X86::BI__builtin_ia32_pternlogd512_mask:
16609 case X86::BI__builtin_ia32_pternlogq512_mask:
16610 case X86::BI__builtin_ia32_pternlogd128_mask:
16611 case X86::BI__builtin_ia32_pternlogd256_mask:
16612 case X86::BI__builtin_ia32_pternlogq128_mask:
16613 case X86::BI__builtin_ia32_pternlogq256_mask:
16614 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
16615
16616 case X86::BI__builtin_ia32_pternlogd512_maskz:
16617 case X86::BI__builtin_ia32_pternlogq512_maskz:
16618 case X86::BI__builtin_ia32_pternlogd128_maskz:
16619 case X86::BI__builtin_ia32_pternlogd256_maskz:
16620 case X86::BI__builtin_ia32_pternlogq128_maskz:
16621 case X86::BI__builtin_ia32_pternlogq256_maskz:
16622 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
16623
16624 case X86::BI__builtin_ia32_vpshldd128:
16625 case X86::BI__builtin_ia32_vpshldd256:
16626 case X86::BI__builtin_ia32_vpshldd512:
16627 case X86::BI__builtin_ia32_vpshldq128:
16628 case X86::BI__builtin_ia32_vpshldq256:
16629 case X86::BI__builtin_ia32_vpshldq512:
16630 case X86::BI__builtin_ia32_vpshldw128:
16631 case X86::BI__builtin_ia32_vpshldw256:
16632 case X86::BI__builtin_ia32_vpshldw512:
16633 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16634
16635 case X86::BI__builtin_ia32_vpshrdd128:
16636 case X86::BI__builtin_ia32_vpshrdd256:
16637 case X86::BI__builtin_ia32_vpshrdd512:
16638 case X86::BI__builtin_ia32_vpshrdq128:
16639 case X86::BI__builtin_ia32_vpshrdq256:
16640 case X86::BI__builtin_ia32_vpshrdq512:
16641 case X86::BI__builtin_ia32_vpshrdw128:
16642 case X86::BI__builtin_ia32_vpshrdw256:
16643 case X86::BI__builtin_ia32_vpshrdw512:
16644 // Ops 0 and 1 are swapped.
16645 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16646
16647 case X86::BI__builtin_ia32_vpshldvd128:
16648 case X86::BI__builtin_ia32_vpshldvd256:
16649 case X86::BI__builtin_ia32_vpshldvd512:
16650 case X86::BI__builtin_ia32_vpshldvq128:
16651 case X86::BI__builtin_ia32_vpshldvq256:
16652 case X86::BI__builtin_ia32_vpshldvq512:
16653 case X86::BI__builtin_ia32_vpshldvw128:
16654 case X86::BI__builtin_ia32_vpshldvw256:
16655 case X86::BI__builtin_ia32_vpshldvw512:
16656 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16657
16658 case X86::BI__builtin_ia32_vpshrdvd128:
16659 case X86::BI__builtin_ia32_vpshrdvd256:
16660 case X86::BI__builtin_ia32_vpshrdvd512:
16661 case X86::BI__builtin_ia32_vpshrdvq128:
16662 case X86::BI__builtin_ia32_vpshrdvq256:
16663 case X86::BI__builtin_ia32_vpshrdvq512:
16664 case X86::BI__builtin_ia32_vpshrdvw128:
16665 case X86::BI__builtin_ia32_vpshrdvw256:
16666 case X86::BI__builtin_ia32_vpshrdvw512:
16667 // Ops 0 and 1 are swapped.
16668 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16669
16670 // Reductions
16671 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16672 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16673 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16674 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16675 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16676 Function *F =
16677 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16678 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16679 Builder.getFastMathFlags().setAllowReassoc();
16680 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16681 }
16682 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16683 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16684 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16685 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16686 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16687 Function *F =
16688 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16689 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16690 Builder.getFastMathFlags().setAllowReassoc();
16691 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16692 }
16693 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16694 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16695 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16696 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16697 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16698 Function *F =
16699 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16700 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16701 Builder.getFastMathFlags().setNoNaNs();
16702 return Builder.CreateCall(F, {Ops[0]});
16703 }
16704 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16705 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16706 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16707 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16708 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16709 Function *F =
16710 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16711 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16712 Builder.getFastMathFlags().setNoNaNs();
16713 return Builder.CreateCall(F, {Ops[0]});
16714 }
16715
16716 case X86::BI__builtin_ia32_rdrand16_step:
16717 case X86::BI__builtin_ia32_rdrand32_step:
16718 case X86::BI__builtin_ia32_rdrand64_step:
16719 case X86::BI__builtin_ia32_rdseed16_step:
16720 case X86::BI__builtin_ia32_rdseed32_step:
16721 case X86::BI__builtin_ia32_rdseed64_step: {
16722 Intrinsic::ID ID;
16723 switch (BuiltinID) {
16724 default: llvm_unreachable("Unsupported intrinsic!");
16725 case X86::BI__builtin_ia32_rdrand16_step:
16726 ID = Intrinsic::x86_rdrand_16;
16727 break;
16728 case X86::BI__builtin_ia32_rdrand32_step:
16729 ID = Intrinsic::x86_rdrand_32;
16730 break;
16731 case X86::BI__builtin_ia32_rdrand64_step:
16732 ID = Intrinsic::x86_rdrand_64;
16733 break;
16734 case X86::BI__builtin_ia32_rdseed16_step:
16735 ID = Intrinsic::x86_rdseed_16;
16736 break;
16737 case X86::BI__builtin_ia32_rdseed32_step:
16738 ID = Intrinsic::x86_rdseed_32;
16739 break;
16740 case X86::BI__builtin_ia32_rdseed64_step:
16741 ID = Intrinsic::x86_rdseed_64;
16742 break;
16743 }
16744
16745 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16746 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16747 Ops[0]);
16748 return Builder.CreateExtractValue(Call, 1);
16749 }
16750 case X86::BI__builtin_ia32_addcarryx_u32:
16751 case X86::BI__builtin_ia32_addcarryx_u64:
16752 case X86::BI__builtin_ia32_subborrow_u32:
16753 case X86::BI__builtin_ia32_subborrow_u64: {
16754 Intrinsic::ID IID;
16755 switch (BuiltinID) {
16756 default: llvm_unreachable("Unsupported intrinsic!");
16757 case X86::BI__builtin_ia32_addcarryx_u32:
16758 IID = Intrinsic::x86_addcarry_32;
16759 break;
16760 case X86::BI__builtin_ia32_addcarryx_u64:
16761 IID = Intrinsic::x86_addcarry_64;
16762 break;
16763 case X86::BI__builtin_ia32_subborrow_u32:
16764 IID = Intrinsic::x86_subborrow_32;
16765 break;
16766 case X86::BI__builtin_ia32_subborrow_u64:
16767 IID = Intrinsic::x86_subborrow_64;
16768 break;
16769 }
16770
16771 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16772 { Ops[0], Ops[1], Ops[2] });
16773 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16774 Ops[3]);
16775 return Builder.CreateExtractValue(Call, 0);
16776 }
16777
16778 case X86::BI__builtin_ia32_fpclassps128_mask:
16779 case X86::BI__builtin_ia32_fpclassps256_mask:
16780 case X86::BI__builtin_ia32_fpclassps512_mask:
16781 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16782 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16783 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16784 case X86::BI__builtin_ia32_fpclassph128_mask:
16785 case X86::BI__builtin_ia32_fpclassph256_mask:
16786 case X86::BI__builtin_ia32_fpclassph512_mask:
16787 case X86::BI__builtin_ia32_fpclasspd128_mask:
16788 case X86::BI__builtin_ia32_fpclasspd256_mask:
16789 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16790 unsigned NumElts =
16791 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16792 Value *MaskIn = Ops[2];
16793 Ops.erase(&Ops[2]);
16794
16795 Intrinsic::ID ID;
16796 switch (BuiltinID) {
16797 default: llvm_unreachable("Unsupported intrinsic!");
16798 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16799 ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
16800 break;
16801 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16802 ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
16803 break;
16804 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16805 ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
16806 break;
16807 case X86::BI__builtin_ia32_fpclassph128_mask:
16808 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16809 break;
16810 case X86::BI__builtin_ia32_fpclassph256_mask:
16811 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16812 break;
16813 case X86::BI__builtin_ia32_fpclassph512_mask:
16814 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16815 break;
16816 case X86::BI__builtin_ia32_fpclassps128_mask:
16817 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16818 break;
16819 case X86::BI__builtin_ia32_fpclassps256_mask:
16820 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16821 break;
16822 case X86::BI__builtin_ia32_fpclassps512_mask:
16823 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16824 break;
16825 case X86::BI__builtin_ia32_fpclasspd128_mask:
16826 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16827 break;
16828 case X86::BI__builtin_ia32_fpclasspd256_mask:
16829 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16830 break;
16831 case X86::BI__builtin_ia32_fpclasspd512_mask:
16832 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16833 break;
16834 }
16835
16836 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16837 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16838 }
16839
16840 case X86::BI__builtin_ia32_vp2intersect_q_512:
16841 case X86::BI__builtin_ia32_vp2intersect_q_256:
16842 case X86::BI__builtin_ia32_vp2intersect_q_128:
16843 case X86::BI__builtin_ia32_vp2intersect_d_512:
16844 case X86::BI__builtin_ia32_vp2intersect_d_256:
16845 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16846 unsigned NumElts =
16847 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16848 Intrinsic::ID ID;
16849
16850 switch (BuiltinID) {
16851 default: llvm_unreachable("Unsupported intrinsic!");
16852 case X86::BI__builtin_ia32_vp2intersect_q_512:
16853 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16854 break;
16855 case X86::BI__builtin_ia32_vp2intersect_q_256:
16856 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16857 break;
16858 case X86::BI__builtin_ia32_vp2intersect_q_128:
16859 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16860 break;
16861 case X86::BI__builtin_ia32_vp2intersect_d_512:
16862 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16863 break;
16864 case X86::BI__builtin_ia32_vp2intersect_d_256:
16865 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16866 break;
16867 case X86::BI__builtin_ia32_vp2intersect_d_128:
16868 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16869 break;
16870 }
16871
16872 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16873 Value *Result = Builder.CreateExtractValue(Call, 0);
16874 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16876
16877 Result = Builder.CreateExtractValue(Call, 1);
16878 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16880 }
16881
16882 case X86::BI__builtin_ia32_vpmultishiftqb128:
16883 case X86::BI__builtin_ia32_vpmultishiftqb256:
16884 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16885 Intrinsic::ID ID;
16886 switch (BuiltinID) {
16887 default: llvm_unreachable("Unsupported intrinsic!");
16888 case X86::BI__builtin_ia32_vpmultishiftqb128:
16889 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16890 break;
16891 case X86::BI__builtin_ia32_vpmultishiftqb256:
16892 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16893 break;
16894 case X86::BI__builtin_ia32_vpmultishiftqb512:
16895 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16896 break;
16897 }
16898
16899 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16900 }
16901
16902 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16903 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16904 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16905 unsigned NumElts =
16906 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16907 Value *MaskIn = Ops[2];
16908 Ops.erase(&Ops[2]);
16909
16910 Intrinsic::ID ID;
16911 switch (BuiltinID) {
16912 default: llvm_unreachable("Unsupported intrinsic!");
16913 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16914 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16915 break;
16916 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16917 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16918 break;
16919 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16920 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16921 break;
16922 }
16923
16924 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16925 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16926 }
16927
16928 // packed comparison intrinsics
16929 case X86::BI__builtin_ia32_cmpeqps:
16930 case X86::BI__builtin_ia32_cmpeqpd:
16931 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16932 case X86::BI__builtin_ia32_cmpltps:
16933 case X86::BI__builtin_ia32_cmpltpd:
16934 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16935 case X86::BI__builtin_ia32_cmpleps:
16936 case X86::BI__builtin_ia32_cmplepd:
16937 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16938 case X86::BI__builtin_ia32_cmpunordps:
16939 case X86::BI__builtin_ia32_cmpunordpd:
16940 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16941 case X86::BI__builtin_ia32_cmpneqps:
16942 case X86::BI__builtin_ia32_cmpneqpd:
16943 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16944 case X86::BI__builtin_ia32_cmpnltps:
16945 case X86::BI__builtin_ia32_cmpnltpd:
16946 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
16947 case X86::BI__builtin_ia32_cmpnleps:
16948 case X86::BI__builtin_ia32_cmpnlepd:
16949 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
16950 case X86::BI__builtin_ia32_cmpordps:
16951 case X86::BI__builtin_ia32_cmpordpd:
16952 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
16953 case X86::BI__builtin_ia32_cmpph128_mask:
16954 case X86::BI__builtin_ia32_cmpph256_mask:
16955 case X86::BI__builtin_ia32_cmpph512_mask:
16956 case X86::BI__builtin_ia32_cmpps128_mask:
16957 case X86::BI__builtin_ia32_cmpps256_mask:
16958 case X86::BI__builtin_ia32_cmpps512_mask:
16959 case X86::BI__builtin_ia32_cmppd128_mask:
16960 case X86::BI__builtin_ia32_cmppd256_mask:
16961 case X86::BI__builtin_ia32_cmppd512_mask:
16962 case X86::BI__builtin_ia32_vcmppd256_round_mask:
16963 case X86::BI__builtin_ia32_vcmpps256_round_mask:
16964 case X86::BI__builtin_ia32_vcmpph256_round_mask:
16965 case X86::BI__builtin_ia32_vcmppbf16512_mask:
16966 case X86::BI__builtin_ia32_vcmppbf16256_mask:
16967 case X86::BI__builtin_ia32_vcmppbf16128_mask:
16968 IsMaskFCmp = true;
16969 [[fallthrough]];
16970 case X86::BI__builtin_ia32_cmpps:
16971 case X86::BI__builtin_ia32_cmpps256:
16972 case X86::BI__builtin_ia32_cmppd:
16973 case X86::BI__builtin_ia32_cmppd256: {
16974 // Lowering vector comparisons to fcmp instructions, while
16975 // ignoring signalling behaviour requested
16976 // ignoring rounding mode requested
16977 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16978
16979 // The third argument is the comparison condition, and integer in the
16980 // range [0, 31]
16981 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16982
16983 // Lowering to IR fcmp instruction.
16984 // Ignoring requested signaling behaviour,
16985 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16986 FCmpInst::Predicate Pred;
16987 bool IsSignaling;
16988 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16989 // behavior is inverted. We'll handle that after the switch.
16990 switch (CC & 0xf) {
16991 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
16992 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
16993 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
16994 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
16995 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
16996 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
16997 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
16998 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
16999 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
17000 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
17001 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
17002 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
17003 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
17004 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
17005 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
17006 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
17007 default: llvm_unreachable("Unhandled CC");
17008 }
17009
17010 // Invert the signalling behavior for 16-31.
17011 if (CC & 0x10)
17012 IsSignaling = !IsSignaling;
17013
17014 // If the predicate is true or false and we're using constrained intrinsics,
17015 // we don't have a compare intrinsic we can use. Just use the legacy X86
17016 // specific intrinsic.
17017 // If the intrinsic is mask enabled and we're using constrained intrinsics,
17018 // use the legacy X86 specific intrinsic.
17019 if (Builder.getIsFPConstrained() &&
17020 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
17021 IsMaskFCmp)) {
17022
17023 Intrinsic::ID IID;
17024 switch (BuiltinID) {
17025 default: llvm_unreachable("Unexpected builtin");
17026 case X86::BI__builtin_ia32_cmpps:
17027 IID = Intrinsic::x86_sse_cmp_ps;
17028 break;
17029 case X86::BI__builtin_ia32_cmpps256:
17030 IID = Intrinsic::x86_avx_cmp_ps_256;
17031 break;
17032 case X86::BI__builtin_ia32_cmppd:
17033 IID = Intrinsic::x86_sse2_cmp_pd;
17034 break;
17035 case X86::BI__builtin_ia32_cmppd256:
17036 IID = Intrinsic::x86_avx_cmp_pd_256;
17037 break;
17038 case X86::BI__builtin_ia32_cmpph128_mask:
17039 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
17040 break;
17041 case X86::BI__builtin_ia32_cmpph256_mask:
17042 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
17043 break;
17044 case X86::BI__builtin_ia32_cmpph512_mask:
17045 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
17046 break;
17047 case X86::BI__builtin_ia32_cmpps512_mask:
17048 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
17049 break;
17050 case X86::BI__builtin_ia32_cmppd512_mask:
17051 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
17052 break;
17053 case X86::BI__builtin_ia32_cmpps128_mask:
17054 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
17055 break;
17056 case X86::BI__builtin_ia32_cmpps256_mask:
17057 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
17058 break;
17059 case X86::BI__builtin_ia32_cmppd128_mask:
17060 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
17061 break;
17062 case X86::BI__builtin_ia32_cmppd256_mask:
17063 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
17064 break;
17065 }
17066
17067 Function *Intr = CGM.getIntrinsic(IID);
17068 if (IsMaskFCmp) {
17069 unsigned NumElts =
17070 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17071 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
17072 Value *Cmp = Builder.CreateCall(Intr, Ops);
17073 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
17074 }
17075
17076 return Builder.CreateCall(Intr, Ops);
17077 }
17078
17079 // Builtins without the _mask suffix return a vector of integers
17080 // of the same width as the input vectors
17081 if (IsMaskFCmp) {
17082 // We ignore SAE if strict FP is disabled. We only keep precise
17083 // exception behavior under strict FP.
17084 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
17085 // object will be required.
17086 unsigned NumElts =
17087 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17088 Value *Cmp;
17089 if (IsSignaling)
17090 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
17091 else
17092 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
17093 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
17094 }
17095
17096 return getVectorFCmpIR(Pred, IsSignaling);
17097 }
17098
17099 // SSE scalar comparison intrinsics
17100 case X86::BI__builtin_ia32_cmpeqss:
17101 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
17102 case X86::BI__builtin_ia32_cmpltss:
17103 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
17104 case X86::BI__builtin_ia32_cmpless:
17105 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
17106 case X86::BI__builtin_ia32_cmpunordss:
17107 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
17108 case X86::BI__builtin_ia32_cmpneqss:
17109 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
17110 case X86::BI__builtin_ia32_cmpnltss:
17111 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
17112 case X86::BI__builtin_ia32_cmpnless:
17113 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
17114 case X86::BI__builtin_ia32_cmpordss:
17115 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
17116 case X86::BI__builtin_ia32_cmpeqsd:
17117 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
17118 case X86::BI__builtin_ia32_cmpltsd:
17119 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
17120 case X86::BI__builtin_ia32_cmplesd:
17121 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
17122 case X86::BI__builtin_ia32_cmpunordsd:
17123 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
17124 case X86::BI__builtin_ia32_cmpneqsd:
17125 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
17126 case X86::BI__builtin_ia32_cmpnltsd:
17127 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
17128 case X86::BI__builtin_ia32_cmpnlesd:
17129 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
17130 case X86::BI__builtin_ia32_cmpordsd:
17131 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
17132
17133 // f16c half2float intrinsics
17134 case X86::BI__builtin_ia32_vcvtph2ps:
17135 case X86::BI__builtin_ia32_vcvtph2ps256:
17136 case X86::BI__builtin_ia32_vcvtph2ps_mask:
17137 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
17138 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
17139 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
17140 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
17141 }
17142
17143 // AVX512 bf16 intrinsics
17144 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
17145 Ops[2] = getMaskVecValue(
17146 *this, Ops[2],
17147 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
17148 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
17149 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17150 }
17151 case X86::BI__builtin_ia32_cvtsbf162ss_32:
17152 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
17153
17154 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17155 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
17156 Intrinsic::ID IID;
17157 switch (BuiltinID) {
17158 default: llvm_unreachable("Unsupported intrinsic!");
17159 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17160 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
17161 break;
17162 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
17163 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
17164 break;
17165 }
17166 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
17167 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
17168 }
17169
17170 case X86::BI__cpuid:
17171 case X86::BI__cpuidex: {
17172 Value *FuncId = EmitScalarExpr(E->getArg(1));
17173 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
17174 ? EmitScalarExpr(E->getArg(2))
17175 : llvm::ConstantInt::get(Int32Ty, 0);
17176
17177 llvm::StructType *CpuidRetTy =
17178 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
17179 llvm::FunctionType *FTy =
17180 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
17181
17182 StringRef Asm, Constraints;
17183 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
17184 Asm = "cpuid";
17185 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
17186 } else {
17187 // x86-64 uses %rbx as the base register, so preserve it.
17188 Asm = "xchgq %rbx, ${1:q}\n"
17189 "cpuid\n"
17190 "xchgq %rbx, ${1:q}";
17191 Constraints = "={ax},=r,={cx},={dx},0,2";
17192 }
17193
17194 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
17195 /*hasSideEffects=*/false);
17196 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
17197 Value *BasePtr = EmitScalarExpr(E->getArg(0));
17198 Value *Store = nullptr;
17199 for (unsigned i = 0; i < 4; i++) {
17200 Value *Extracted = Builder.CreateExtractValue(IACall, i);
17201 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
17202 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
17203 }
17204
17205 // Return the last store instruction to signal that we have emitted the
17206 // the intrinsic.
17207 return Store;
17208 }
17209
17210 case X86::BI__emul:
17211 case X86::BI__emulu: {
17212 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
17213 bool isSigned = (BuiltinID == X86::BI__emul);
17214 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
17215 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
17216 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
17217 }
17218 case X86::BI__mulh:
17219 case X86::BI__umulh:
17220 case X86::BI_mul128:
17221 case X86::BI_umul128: {
17222 llvm::Type *ResType = ConvertType(E->getType());
17223 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17224
17225 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
17226 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
17227 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
17228
17229 Value *MulResult, *HigherBits;
17230 if (IsSigned) {
17231 MulResult = Builder.CreateNSWMul(LHS, RHS);
17232 HigherBits = Builder.CreateAShr(MulResult, 64);
17233 } else {
17234 MulResult = Builder.CreateNUWMul(LHS, RHS);
17235 HigherBits = Builder.CreateLShr(MulResult, 64);
17236 }
17237 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
17238
17239 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
17240 return HigherBits;
17241
17242 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
17243 Builder.CreateStore(HigherBits, HighBitsAddress);
17244 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
17245 }
17246
17247 case X86::BI__faststorefence: {
17248 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17249 llvm::SyncScope::System);
17250 }
17251 case X86::BI__shiftleft128:
17252 case X86::BI__shiftright128: {
17253 llvm::Function *F = CGM.getIntrinsic(
17254 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
17255 Int64Ty);
17256 // Flip low/high ops and zero-extend amount to matching type.
17257 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
17258 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
17259 std::swap(Ops[0], Ops[1]);
17260 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
17261 return Builder.CreateCall(F, Ops);
17262 }
17263 case X86::BI_ReadWriteBarrier:
17264 case X86::BI_ReadBarrier:
17265 case X86::BI_WriteBarrier: {
17266 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17267 llvm::SyncScope::SingleThread);
17268 }
17269
17270 case X86::BI_AddressOfReturnAddress: {
17271 Function *F =
17272 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
17273 return Builder.CreateCall(F);
17274 }
17275 case X86::BI__stosb: {
17276 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
17277 // instruction, but it will create a memset that won't be optimized away.
17278 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
17279 }
17280 // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
17281 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17282 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17283 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17284 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17285 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17286 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17287 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17288 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
17289 Intrinsic::ID IID;
17290 switch (BuiltinID) {
17291 default:
17292 llvm_unreachable("Unsupported intrinsic!");
17293 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17294 IID = Intrinsic::x86_t2rpntlvwz0_internal;
17295 break;
17296 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17297 IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
17298 break;
17299 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17300 IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
17301 break;
17302 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17303 IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
17304 break;
17305 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17306 IID = Intrinsic::x86_t2rpntlvwz1_internal;
17307 break;
17308 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17309 IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
17310 break;
17311 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17312 IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
17313 break;
17314 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
17315 IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
17316 break;
17317 }
17318
17319 // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
17320 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
17321 {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
17322
17323 auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
17324 assert(PtrTy && "arg3 must be of pointer type");
17325 QualType PtreeTy = PtrTy->getPointeeType();
17326 llvm::Type *TyPtee = ConvertType(PtreeTy);
17327
17328 // Bitcast amx type (x86_amx) to vector type (256 x i32)
17329 // Then store tile0 into DstPtr0
17330 Value *T0 = Builder.CreateExtractValue(Call, 0);
17331 Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17332 {TyPtee}, {T0});
17333 Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
17334
17335 // Then store tile1 into DstPtr1
17336 Value *T1 = Builder.CreateExtractValue(Call, 1);
17337 Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17338 {TyPtee}, {T1});
17339 Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
17340
17341 // Note: Here we escape directly use x86_tilestored64_internal to store
17342 // the results due to it can't make sure the Mem written scope. This may
17343 // cause shapes reloads after first amx intrinsic, which current amx reg-
17344 // ister allocation has no ability to handle it.
17345
17346 return Store;
17347 }
17348 case X86::BI__ud2:
17349 // llvm.trap makes a ud2a instruction on x86.
17350 return EmitTrapCall(Intrinsic::trap);
17351 case X86::BI__int2c: {
17352 // This syscall signals a driver assertion failure in x86 NT kernels.
17353 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
17354 llvm::InlineAsm *IA =
17355 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
17356 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
17357 getLLVMContext(), llvm::AttributeList::FunctionIndex,
17358 llvm::Attribute::NoReturn);
17359 llvm::CallInst *CI = Builder.CreateCall(IA);
17360 CI->setAttributes(NoReturnAttr);
17361 return CI;
17362 }
17363 case X86::BI__readfsbyte:
17364 case X86::BI__readfsword:
17365 case X86::BI__readfsdword:
17366 case X86::BI__readfsqword: {
17367 llvm::Type *IntTy = ConvertType(E->getType());
17368 Value *Ptr = Builder.CreateIntToPtr(
17369 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
17370 LoadInst *Load = Builder.CreateAlignedLoad(
17371 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17372 Load->setVolatile(true);
17373 return Load;
17374 }
17375 case X86::BI__readgsbyte:
17376 case X86::BI__readgsword:
17377 case X86::BI__readgsdword:
17378 case X86::BI__readgsqword: {
17379 llvm::Type *IntTy = ConvertType(E->getType());
17380 Value *Ptr = Builder.CreateIntToPtr(
17381 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
17382 LoadInst *Load = Builder.CreateAlignedLoad(
17383 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17384 Load->setVolatile(true);
17385 return Load;
17386 }
17387 case X86::BI__builtin_ia32_encodekey128_u32: {
17388 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
17389
17390 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
17391
17392 for (int i = 0; i < 3; ++i) {
17393 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17394 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
17395 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17396 }
17397
17398 return Builder.CreateExtractValue(Call, 0);
17399 }
17400 case X86::BI__builtin_ia32_encodekey256_u32: {
17401 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
17402
17403 Value *Call =
17404 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
17405
17406 for (int i = 0; i < 4; ++i) {
17407 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17408 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
17409 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17410 }
17411
17412 return Builder.CreateExtractValue(Call, 0);
17413 }
17414 case X86::BI__builtin_ia32_aesenc128kl_u8:
17415 case X86::BI__builtin_ia32_aesdec128kl_u8:
17416 case X86::BI__builtin_ia32_aesenc256kl_u8:
17417 case X86::BI__builtin_ia32_aesdec256kl_u8: {
17418 Intrinsic::ID IID;
17419 StringRef BlockName;
17420 switch (BuiltinID) {
17421 default:
17422 llvm_unreachable("Unexpected builtin");
17423 case X86::BI__builtin_ia32_aesenc128kl_u8:
17424 IID = Intrinsic::x86_aesenc128kl;
17425 BlockName = "aesenc128kl";
17426 break;
17427 case X86::BI__builtin_ia32_aesdec128kl_u8:
17428 IID = Intrinsic::x86_aesdec128kl;
17429 BlockName = "aesdec128kl";
17430 break;
17431 case X86::BI__builtin_ia32_aesenc256kl_u8:
17432 IID = Intrinsic::x86_aesenc256kl;
17433 BlockName = "aesenc256kl";
17434 break;
17435 case X86::BI__builtin_ia32_aesdec256kl_u8:
17436 IID = Intrinsic::x86_aesdec256kl;
17437 BlockName = "aesdec256kl";
17438 break;
17439 }
17440
17441 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
17442
17443 BasicBlock *NoError =
17444 createBasicBlock(BlockName + "_no_error", this->CurFn);
17445 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17446 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17447
17448 Value *Ret = Builder.CreateExtractValue(Call, 0);
17449 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17450 Value *Out = Builder.CreateExtractValue(Call, 1);
17451 Builder.CreateCondBr(Succ, NoError, Error);
17452
17453 Builder.SetInsertPoint(NoError);
17455 Builder.CreateBr(End);
17456
17457 Builder.SetInsertPoint(Error);
17458 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17459 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
17460 Builder.CreateBr(End);
17461
17462 Builder.SetInsertPoint(End);
17463 return Builder.CreateExtractValue(Call, 0);
17464 }
17465 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17466 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17467 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17468 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
17469 Intrinsic::ID IID;
17470 StringRef BlockName;
17471 switch (BuiltinID) {
17472 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17473 IID = Intrinsic::x86_aesencwide128kl;
17474 BlockName = "aesencwide128kl";
17475 break;
17476 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17477 IID = Intrinsic::x86_aesdecwide128kl;
17478 BlockName = "aesdecwide128kl";
17479 break;
17480 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17481 IID = Intrinsic::x86_aesencwide256kl;
17482 BlockName = "aesencwide256kl";
17483 break;
17484 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
17485 IID = Intrinsic::x86_aesdecwide256kl;
17486 BlockName = "aesdecwide256kl";
17487 break;
17488 }
17489
17490 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
17491 Value *InOps[9];
17492 InOps[0] = Ops[2];
17493 for (int i = 0; i != 8; ++i) {
17494 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
17495 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
17496 }
17497
17498 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
17499
17500 BasicBlock *NoError =
17501 createBasicBlock(BlockName + "_no_error", this->CurFn);
17502 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17503 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17504
17505 Value *Ret = Builder.CreateExtractValue(Call, 0);
17506 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17507 Builder.CreateCondBr(Succ, NoError, Error);
17508
17509 Builder.SetInsertPoint(NoError);
17510 for (int i = 0; i != 8; ++i) {
17511 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17512 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
17513 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
17514 }
17515 Builder.CreateBr(End);
17516
17517 Builder.SetInsertPoint(Error);
17518 for (int i = 0; i != 8; ++i) {
17519 Value *Out = Builder.CreateExtractValue(Call, i + 1);
17520 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17521 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
17522 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
17523 }
17524 Builder.CreateBr(End);
17525
17526 Builder.SetInsertPoint(End);
17527 return Builder.CreateExtractValue(Call, 0);
17528 }
17529 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
17530 IsConjFMA = true;
17531 [[fallthrough]];
17532 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
17533 Intrinsic::ID IID = IsConjFMA
17534 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
17535 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
17536 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17537 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17538 }
17539 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
17540 IsConjFMA = true;
17541 LLVM_FALLTHROUGH;
17542 case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
17543 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
17544 : Intrinsic::x86_avx10_mask_vfmaddcph256;
17545 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17546 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17547 }
17548 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
17549 IsConjFMA = true;
17550 [[fallthrough]];
17551 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
17552 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17553 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17554 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17555 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
17556 return EmitX86Select(*this, And, Call, Ops[0]);
17557 }
17558 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
17559 IsConjFMA = true;
17560 [[fallthrough]];
17561 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
17562 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17563 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17564 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17565 static constexpr int Mask[] = {0, 5, 6, 7};
17566 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
17567 }
17568 case X86::BI__builtin_ia32_prefetchi:
17569 return Builder.CreateCall(
17570 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
17571 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
17572 llvm::ConstantInt::get(Int32Ty, 0)});
17573 }
17574}
17575
17576Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
17577 const CallExpr *E) {
17578 // Do not emit the builtin arguments in the arguments of a function call,
17579 // because the evaluation order of function arguments is not specified in C++.
17580 // This is important when testing to ensure the arguments are emitted in the
17581 // same order every time. Eg:
17582 // Instead of:
17583 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
17584 // EmitScalarExpr(E->getArg(1)), "swdiv");
17585 // Use:
17586 // Value *Op0 = EmitScalarExpr(E->getArg(0));
17587 // Value *Op1 = EmitScalarExpr(E->getArg(1));
17588 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
17589
17590 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17591
17592#include "llvm/TargetParser/PPCTargetParser.def"
17593 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
17594 unsigned Mask, CmpInst::Predicate CompOp,
17595 unsigned OpValue) -> Value * {
17596 if (SupportMethod == BUILTIN_PPC_FALSE)
17597 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17598
17599 if (SupportMethod == BUILTIN_PPC_TRUE)
17600 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
17601
17602 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
17603
17604 llvm::Value *FieldValue = nullptr;
17605 if (SupportMethod == USE_SYS_CONF) {
17606 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
17607 llvm::Constant *SysConf =
17608 CGM.CreateRuntimeVariable(STy, "_system_configuration");
17609
17610 // Grab the appropriate field from _system_configuration.
17611 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
17612 ConstantInt::get(Int32Ty, FieldIdx)};
17613
17614 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
17615 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
17617 } else if (SupportMethod == SYS_CALL) {
17618 llvm::FunctionType *FTy =
17619 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
17620 llvm::FunctionCallee Func =
17621 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
17622
17623 FieldValue =
17624 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
17625 }
17626 assert(FieldValue &&
17627 "SupportMethod value is not defined in PPCTargetParser.def.");
17628
17629 if (Mask)
17630 FieldValue = Builder.CreateAnd(FieldValue, Mask);
17631
17632 llvm::Type *ValueType = FieldValue->getType();
17633 bool IsValueType64Bit = ValueType->isIntegerTy(64);
17634 assert(
17635 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
17636 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17637
17638 return Builder.CreateICmp(
17639 CompOp, FieldValue,
17640 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
17641 };
17642
17643 switch (BuiltinID) {
17644 default: return nullptr;
17645
17646 case Builtin::BI__builtin_cpu_is: {
17647 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17648 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17649 llvm::Triple Triple = getTarget().getTriple();
17650
17651 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
17652 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
17653
17654 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
17655 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
17656#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17657 AIXID) \
17658 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17659#include "llvm/TargetParser/PPCTargetParser.def"
17660 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
17661 BUILTIN_PPC_UNSUPPORTED, 0}));
17662
17663 if (Triple.isOSAIX()) {
17664 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17665 "Invalid CPU name. Missed by SemaChecking?");
17666 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
17667 ICmpInst::ICMP_EQ, AIXIDValue);
17668 }
17669
17670 assert(Triple.isOSLinux() &&
17671 "__builtin_cpu_is() is only supported for AIX and Linux.");
17672
17673 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17674 "Invalid CPU name. Missed by SemaChecking?");
17675
17676 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
17677 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17678
17679 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
17680 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17681 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
17682 return Builder.CreateICmpEQ(TheCall,
17683 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
17684 }
17685 case Builtin::BI__builtin_cpu_supports: {
17686 llvm::Triple Triple = getTarget().getTriple();
17687 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17688 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17689 if (Triple.isOSAIX()) {
17690 unsigned SupportMethod, FieldIdx, Mask, Value;
17691 CmpInst::Predicate CompOp;
17692 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
17693 unsigned>
17694 CPUSupportType;
17695 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
17696 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
17697#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17698 VALUE) \
17699 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17700#include "llvm/TargetParser/PPCTargetParser.def"
17701 .Default({BUILTIN_PPC_FALSE, 0, 0,
17702 CmpInst::Predicate(), 0}));
17703 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17704 Value);
17705 }
17706
17707 assert(Triple.isOSLinux() &&
17708 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17709 unsigned FeatureWord;
17710 unsigned BitMask;
17711 std::tie(FeatureWord, BitMask) =
17712 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17713#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17714 .Case(Name, {FA_WORD, Bitmask})
17715#include "llvm/TargetParser/PPCTargetParser.def"
17716 .Default({0, 0});
17717 if (!BitMask)
17718 return Builder.getFalse();
17719 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
17720 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17721 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
17722 Value *Mask =
17723 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
17724 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
17725#undef PPC_FAWORD_HWCAP
17726#undef PPC_FAWORD_HWCAP2
17727#undef PPC_FAWORD_CPUID
17728 }
17729
17730 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17731 // call __builtin_readcyclecounter.
17732 case PPC::BI__builtin_ppc_get_timebase:
17733 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
17734
17735 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17736 case PPC::BI__builtin_altivec_lvx:
17737 case PPC::BI__builtin_altivec_lvxl:
17738 case PPC::BI__builtin_altivec_lvebx:
17739 case PPC::BI__builtin_altivec_lvehx:
17740 case PPC::BI__builtin_altivec_lvewx:
17741 case PPC::BI__builtin_altivec_lvsl:
17742 case PPC::BI__builtin_altivec_lvsr:
17743 case PPC::BI__builtin_vsx_lxvd2x:
17744 case PPC::BI__builtin_vsx_lxvw4x:
17745 case PPC::BI__builtin_vsx_lxvd2x_be:
17746 case PPC::BI__builtin_vsx_lxvw4x_be:
17747 case PPC::BI__builtin_vsx_lxvl:
17748 case PPC::BI__builtin_vsx_lxvll:
17749 {
17751 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17752 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17753 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17754 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17755 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17756 Ops.pop_back();
17757 }
17758
17759 switch (BuiltinID) {
17760 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17761 case PPC::BI__builtin_altivec_lvx:
17762 ID = Intrinsic::ppc_altivec_lvx;
17763 break;
17764 case PPC::BI__builtin_altivec_lvxl:
17765 ID = Intrinsic::ppc_altivec_lvxl;
17766 break;
17767 case PPC::BI__builtin_altivec_lvebx:
17768 ID = Intrinsic::ppc_altivec_lvebx;
17769 break;
17770 case PPC::BI__builtin_altivec_lvehx:
17771 ID = Intrinsic::ppc_altivec_lvehx;
17772 break;
17773 case PPC::BI__builtin_altivec_lvewx:
17774 ID = Intrinsic::ppc_altivec_lvewx;
17775 break;
17776 case PPC::BI__builtin_altivec_lvsl:
17777 ID = Intrinsic::ppc_altivec_lvsl;
17778 break;
17779 case PPC::BI__builtin_altivec_lvsr:
17780 ID = Intrinsic::ppc_altivec_lvsr;
17781 break;
17782 case PPC::BI__builtin_vsx_lxvd2x:
17783 ID = Intrinsic::ppc_vsx_lxvd2x;
17784 break;
17785 case PPC::BI__builtin_vsx_lxvw4x:
17786 ID = Intrinsic::ppc_vsx_lxvw4x;
17787 break;
17788 case PPC::BI__builtin_vsx_lxvd2x_be:
17789 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17790 break;
17791 case PPC::BI__builtin_vsx_lxvw4x_be:
17792 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17793 break;
17794 case PPC::BI__builtin_vsx_lxvl:
17795 ID = Intrinsic::ppc_vsx_lxvl;
17796 break;
17797 case PPC::BI__builtin_vsx_lxvll:
17798 ID = Intrinsic::ppc_vsx_lxvll;
17799 break;
17800 }
17801 llvm::Function *F = CGM.getIntrinsic(ID);
17802 return Builder.CreateCall(F, Ops, "");
17803 }
17804
17805 // vec_st, vec_xst_be
17806 case PPC::BI__builtin_altivec_stvx:
17807 case PPC::BI__builtin_altivec_stvxl:
17808 case PPC::BI__builtin_altivec_stvebx:
17809 case PPC::BI__builtin_altivec_stvehx:
17810 case PPC::BI__builtin_altivec_stvewx:
17811 case PPC::BI__builtin_vsx_stxvd2x:
17812 case PPC::BI__builtin_vsx_stxvw4x:
17813 case PPC::BI__builtin_vsx_stxvd2x_be:
17814 case PPC::BI__builtin_vsx_stxvw4x_be:
17815 case PPC::BI__builtin_vsx_stxvl:
17816 case PPC::BI__builtin_vsx_stxvll:
17817 {
17819 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17820 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17821 Ops.push_back(EmitScalarExpr(E->getArg(2)));
17822 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17823 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17824 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17825 Ops.pop_back();
17826 }
17827
17828 switch (BuiltinID) {
17829 default: llvm_unreachable("Unsupported st intrinsic!");
17830 case PPC::BI__builtin_altivec_stvx:
17831 ID = Intrinsic::ppc_altivec_stvx;
17832 break;
17833 case PPC::BI__builtin_altivec_stvxl:
17834 ID = Intrinsic::ppc_altivec_stvxl;
17835 break;
17836 case PPC::BI__builtin_altivec_stvebx:
17837 ID = Intrinsic::ppc_altivec_stvebx;
17838 break;
17839 case PPC::BI__builtin_altivec_stvehx:
17840 ID = Intrinsic::ppc_altivec_stvehx;
17841 break;
17842 case PPC::BI__builtin_altivec_stvewx:
17843 ID = Intrinsic::ppc_altivec_stvewx;
17844 break;
17845 case PPC::BI__builtin_vsx_stxvd2x:
17846 ID = Intrinsic::ppc_vsx_stxvd2x;
17847 break;
17848 case PPC::BI__builtin_vsx_stxvw4x:
17849 ID = Intrinsic::ppc_vsx_stxvw4x;
17850 break;
17851 case PPC::BI__builtin_vsx_stxvd2x_be:
17852 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17853 break;
17854 case PPC::BI__builtin_vsx_stxvw4x_be:
17855 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17856 break;
17857 case PPC::BI__builtin_vsx_stxvl:
17858 ID = Intrinsic::ppc_vsx_stxvl;
17859 break;
17860 case PPC::BI__builtin_vsx_stxvll:
17861 ID = Intrinsic::ppc_vsx_stxvll;
17862 break;
17863 }
17864 llvm::Function *F = CGM.getIntrinsic(ID);
17865 return Builder.CreateCall(F, Ops, "");
17866 }
17867 case PPC::BI__builtin_vsx_ldrmb: {
17868 // Essentially boils down to performing an unaligned VMX load sequence so
17869 // as to avoid crossing a page boundary and then shuffling the elements
17870 // into the right side of the vector register.
17871 Value *Op0 = EmitScalarExpr(E->getArg(0));
17872 Value *Op1 = EmitScalarExpr(E->getArg(1));
17873 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17874 llvm::Type *ResTy = ConvertType(E->getType());
17875 bool IsLE = getTarget().isLittleEndian();
17876
17877 // If the user wants the entire vector, just load the entire vector.
17878 if (NumBytes == 16) {
17879 Value *LD =
17881 if (!IsLE)
17882 return LD;
17883
17884 // Reverse the bytes on LE.
17885 SmallVector<int, 16> RevMask;
17886 for (int Idx = 0; Idx < 16; Idx++)
17887 RevMask.push_back(15 - Idx);
17888 return Builder.CreateShuffleVector(LD, LD, RevMask);
17889 }
17890
17891 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17892 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17893 : Intrinsic::ppc_altivec_lvsl);
17894 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17895 Value *HiMem = Builder.CreateGEP(
17896 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
17897 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
17898 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
17899 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
17900
17901 Op0 = IsLE ? HiLd : LoLd;
17902 Op1 = IsLE ? LoLd : HiLd;
17903 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
17904 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
17905
17906 if (IsLE) {
17907 SmallVector<int, 16> Consts;
17908 for (int Idx = 0; Idx < 16; Idx++) {
17909 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17910 : 16 - (NumBytes - Idx);
17911 Consts.push_back(Val);
17912 }
17913 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
17914 Zero, Consts);
17915 }
17917 for (int Idx = 0; Idx < 16; Idx++)
17918 Consts.push_back(Builder.getInt8(NumBytes + Idx));
17919 Value *Mask2 = ConstantVector::get(Consts);
17920 return Builder.CreateBitCast(
17921 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
17922 }
17923 case PPC::BI__builtin_vsx_strmb: {
17924 Value *Op0 = EmitScalarExpr(E->getArg(0));
17925 Value *Op1 = EmitScalarExpr(E->getArg(1));
17926 Value *Op2 = EmitScalarExpr(E->getArg(2));
17927 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17928 bool IsLE = getTarget().isLittleEndian();
17929 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17930 // Storing the whole vector, simply store it on BE and reverse bytes and
17931 // store on LE.
17932 if (Width == 16) {
17933 Value *StVec = Op2;
17934 if (IsLE) {
17935 SmallVector<int, 16> RevMask;
17936 for (int Idx = 0; Idx < 16; Idx++)
17937 RevMask.push_back(15 - Idx);
17938 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
17939 }
17940 return Builder.CreateStore(
17941 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
17942 }
17943 auto *ConvTy = Int64Ty;
17944 unsigned NumElts = 0;
17945 switch (Width) {
17946 default:
17947 llvm_unreachable("width for stores must be a power of 2");
17948 case 8:
17949 ConvTy = Int64Ty;
17950 NumElts = 2;
17951 break;
17952 case 4:
17953 ConvTy = Int32Ty;
17954 NumElts = 4;
17955 break;
17956 case 2:
17957 ConvTy = Int16Ty;
17958 NumElts = 8;
17959 break;
17960 case 1:
17961 ConvTy = Int8Ty;
17962 NumElts = 16;
17963 break;
17964 }
17965 Value *Vec = Builder.CreateBitCast(
17966 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
17967 Value *Ptr =
17968 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
17969 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
17970 if (IsLE && Width > 1) {
17971 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
17972 Elt = Builder.CreateCall(F, Elt);
17973 }
17974 return Builder.CreateStore(
17975 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
17976 };
17977 unsigned Stored = 0;
17978 unsigned RemainingBytes = NumBytes;
17979 Value *Result;
17980 if (NumBytes == 16)
17981 return StoreSubVec(16, 0, 0);
17982 if (NumBytes >= 8) {
17983 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
17984 RemainingBytes -= 8;
17985 Stored += 8;
17986 }
17987 if (RemainingBytes >= 4) {
17988 Result = StoreSubVec(4, NumBytes - Stored - 4,
17989 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
17990 RemainingBytes -= 4;
17991 Stored += 4;
17992 }
17993 if (RemainingBytes >= 2) {
17994 Result = StoreSubVec(2, NumBytes - Stored - 2,
17995 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17996 RemainingBytes -= 2;
17997 Stored += 2;
17998 }
17999 if (RemainingBytes)
18000 Result =
18001 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
18002 return Result;
18003 }
18004 // Square root
18005 case PPC::BI__builtin_vsx_xvsqrtsp:
18006 case PPC::BI__builtin_vsx_xvsqrtdp: {
18007 llvm::Type *ResultType = ConvertType(E->getType());
18008 Value *X = EmitScalarExpr(E->getArg(0));
18009 if (Builder.getIsFPConstrained()) {
18010 llvm::Function *F = CGM.getIntrinsic(
18011 Intrinsic::experimental_constrained_sqrt, ResultType);
18012 return Builder.CreateConstrainedFPCall(F, X);
18013 } else {
18014 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18015 return Builder.CreateCall(F, X);
18016 }
18017 }
18018 // Count leading zeros
18019 case PPC::BI__builtin_altivec_vclzb:
18020 case PPC::BI__builtin_altivec_vclzh:
18021 case PPC::BI__builtin_altivec_vclzw:
18022 case PPC::BI__builtin_altivec_vclzd: {
18023 llvm::Type *ResultType = ConvertType(E->getType());
18024 Value *X = EmitScalarExpr(E->getArg(0));
18025 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18026 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
18027 return Builder.CreateCall(F, {X, Undef});
18028 }
18029 case PPC::BI__builtin_altivec_vctzb:
18030 case PPC::BI__builtin_altivec_vctzh:
18031 case PPC::BI__builtin_altivec_vctzw:
18032 case PPC::BI__builtin_altivec_vctzd: {
18033 llvm::Type *ResultType = ConvertType(E->getType());
18034 Value *X = EmitScalarExpr(E->getArg(0));
18035 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18036 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
18037 return Builder.CreateCall(F, {X, Undef});
18038 }
18039 case PPC::BI__builtin_altivec_vinsd:
18040 case PPC::BI__builtin_altivec_vinsw:
18041 case PPC::BI__builtin_altivec_vinsd_elt:
18042 case PPC::BI__builtin_altivec_vinsw_elt: {
18043 llvm::Type *ResultType = ConvertType(E->getType());
18044 Value *Op0 = EmitScalarExpr(E->getArg(0));
18045 Value *Op1 = EmitScalarExpr(E->getArg(1));
18046 Value *Op2 = EmitScalarExpr(E->getArg(2));
18047
18048 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18049 BuiltinID == PPC::BI__builtin_altivec_vinsd);
18050
18051 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18052 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
18053
18054 // The third argument must be a compile time constant.
18055 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18056 assert(ArgCI &&
18057 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
18058
18059 // Valid value for the third argument is dependent on the input type and
18060 // builtin called.
18061 int ValidMaxValue = 0;
18062 if (IsUnaligned)
18063 ValidMaxValue = (Is32bit) ? 12 : 8;
18064 else
18065 ValidMaxValue = (Is32bit) ? 3 : 1;
18066
18067 // Get value of third argument.
18068 int64_t ConstArg = ArgCI->getSExtValue();
18069
18070 // Compose range checking error message.
18071 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
18072 RangeErrMsg += " number " + llvm::to_string(ConstArg);
18073 RangeErrMsg += " is outside of the valid range [0, ";
18074 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
18075
18076 // Issue error if third argument is not within the valid range.
18077 if (ConstArg < 0 || ConstArg > ValidMaxValue)
18078 CGM.Error(E->getExprLoc(), RangeErrMsg);
18079
18080 // Input to vec_replace_elt is an element index, convert to byte index.
18081 if (!IsUnaligned) {
18082 ConstArg *= Is32bit ? 4 : 8;
18083 // Fix the constant according to endianess.
18084 if (getTarget().isLittleEndian())
18085 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
18086 }
18087
18088 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
18089 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
18090 // Casting input to vector int as per intrinsic definition.
18091 Op0 =
18092 Is32bit
18093 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
18094 : Builder.CreateBitCast(Op0,
18095 llvm::FixedVectorType::get(Int64Ty, 2));
18096 return Builder.CreateBitCast(
18097 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
18098 }
18099 case PPC::BI__builtin_altivec_vadduqm:
18100 case PPC::BI__builtin_altivec_vsubuqm: {
18101 Value *Op0 = EmitScalarExpr(E->getArg(0));
18102 Value *Op1 = EmitScalarExpr(E->getArg(1));
18103 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
18104 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
18105 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
18106 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
18107 return Builder.CreateAdd(Op0, Op1, "vadduqm");
18108 else
18109 return Builder.CreateSub(Op0, Op1, "vsubuqm");
18110 }
18111 case PPC::BI__builtin_altivec_vaddcuq_c:
18112 case PPC::BI__builtin_altivec_vsubcuq_c: {
18114 Value *Op0 = EmitScalarExpr(E->getArg(0));
18115 Value *Op1 = EmitScalarExpr(E->getArg(1));
18116 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18117 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18118 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18119 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18120 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
18121 ? Intrinsic::ppc_altivec_vaddcuq
18122 : Intrinsic::ppc_altivec_vsubcuq;
18123 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18124 }
18125 case PPC::BI__builtin_altivec_vaddeuqm_c:
18126 case PPC::BI__builtin_altivec_vaddecuq_c:
18127 case PPC::BI__builtin_altivec_vsubeuqm_c:
18128 case PPC::BI__builtin_altivec_vsubecuq_c: {
18130 Value *Op0 = EmitScalarExpr(E->getArg(0));
18131 Value *Op1 = EmitScalarExpr(E->getArg(1));
18132 Value *Op2 = EmitScalarExpr(E->getArg(2));
18133 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18134 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18135 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18136 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18137 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
18138 switch (BuiltinID) {
18139 default:
18140 llvm_unreachable("Unsupported intrinsic!");
18141 case PPC::BI__builtin_altivec_vaddeuqm_c:
18142 ID = Intrinsic::ppc_altivec_vaddeuqm;
18143 break;
18144 case PPC::BI__builtin_altivec_vaddecuq_c:
18145 ID = Intrinsic::ppc_altivec_vaddecuq;
18146 break;
18147 case PPC::BI__builtin_altivec_vsubeuqm_c:
18148 ID = Intrinsic::ppc_altivec_vsubeuqm;
18149 break;
18150 case PPC::BI__builtin_altivec_vsubecuq_c:
18151 ID = Intrinsic::ppc_altivec_vsubecuq;
18152 break;
18153 }
18154 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18155 }
18156 case PPC::BI__builtin_ppc_rldimi:
18157 case PPC::BI__builtin_ppc_rlwimi: {
18158 Value *Op0 = EmitScalarExpr(E->getArg(0));
18159 Value *Op1 = EmitScalarExpr(E->getArg(1));
18160 Value *Op2 = EmitScalarExpr(E->getArg(2));
18161 Value *Op3 = EmitScalarExpr(E->getArg(3));
18162 // rldimi is 64-bit instruction, expand the intrinsic before isel to
18163 // leverage peephole and avoid legalization efforts.
18164 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
18165 !getTarget().getTriple().isPPC64()) {
18166 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
18167 Op2 = Builder.CreateZExt(Op2, Int64Ty);
18168 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
18169 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
18170 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
18171 }
18172 return Builder.CreateCall(
18173 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
18174 ? Intrinsic::ppc_rldimi
18175 : Intrinsic::ppc_rlwimi),
18176 {Op0, Op1, Op2, Op3});
18177 }
18178 case PPC::BI__builtin_ppc_rlwnm: {
18179 Value *Op0 = EmitScalarExpr(E->getArg(0));
18180 Value *Op1 = EmitScalarExpr(E->getArg(1));
18181 Value *Op2 = EmitScalarExpr(E->getArg(2));
18182 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
18183 {Op0, Op1, Op2});
18184 }
18185 case PPC::BI__builtin_ppc_poppar4:
18186 case PPC::BI__builtin_ppc_poppar8: {
18187 Value *Op0 = EmitScalarExpr(E->getArg(0));
18188 llvm::Type *ArgType = Op0->getType();
18189 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
18190 Value *Tmp = Builder.CreateCall(F, Op0);
18191
18192 llvm::Type *ResultType = ConvertType(E->getType());
18193 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
18194 if (Result->getType() != ResultType)
18195 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
18196 "cast");
18197 return Result;
18198 }
18199 case PPC::BI__builtin_ppc_cmpb: {
18200 Value *Op0 = EmitScalarExpr(E->getArg(0));
18201 Value *Op1 = EmitScalarExpr(E->getArg(1));
18202 if (getTarget().getTriple().isPPC64()) {
18203 Function *F =
18204 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
18205 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
18206 }
18207 // For 32 bit, emit the code as below:
18208 // %conv = trunc i64 %a to i32
18209 // %conv1 = trunc i64 %b to i32
18210 // %shr = lshr i64 %a, 32
18211 // %conv2 = trunc i64 %shr to i32
18212 // %shr3 = lshr i64 %b, 32
18213 // %conv4 = trunc i64 %shr3 to i32
18214 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
18215 // %conv5 = zext i32 %0 to i64
18216 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
18217 // %conv614 = zext i32 %1 to i64
18218 // %shl = shl nuw i64 %conv614, 32
18219 // %or = or i64 %shl, %conv5
18220 // ret i64 %or
18221 Function *F =
18222 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
18223 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
18224 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
18225 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
18226 Value *ArgOneHi =
18227 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
18228 Value *ArgTwoHi =
18229 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
18230 Value *ResLo = Builder.CreateZExt(
18231 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
18232 Value *ResHiShift = Builder.CreateZExt(
18233 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
18234 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
18235 return Builder.CreateOr(ResLo, ResHi);
18236 }
18237 // Copy sign
18238 case PPC::BI__builtin_vsx_xvcpsgnsp:
18239 case PPC::BI__builtin_vsx_xvcpsgndp: {
18240 llvm::Type *ResultType = ConvertType(E->getType());
18241 Value *X = EmitScalarExpr(E->getArg(0));
18242 Value *Y = EmitScalarExpr(E->getArg(1));
18243 ID = Intrinsic::copysign;
18244 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18245 return Builder.CreateCall(F, {X, Y});
18246 }
18247 // Rounding/truncation
18248 case PPC::BI__builtin_vsx_xvrspip:
18249 case PPC::BI__builtin_vsx_xvrdpip:
18250 case PPC::BI__builtin_vsx_xvrdpim:
18251 case PPC::BI__builtin_vsx_xvrspim:
18252 case PPC::BI__builtin_vsx_xvrdpi:
18253 case PPC::BI__builtin_vsx_xvrspi:
18254 case PPC::BI__builtin_vsx_xvrdpic:
18255 case PPC::BI__builtin_vsx_xvrspic:
18256 case PPC::BI__builtin_vsx_xvrdpiz:
18257 case PPC::BI__builtin_vsx_xvrspiz: {
18258 llvm::Type *ResultType = ConvertType(E->getType());
18259 Value *X = EmitScalarExpr(E->getArg(0));
18260 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
18261 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
18262 ID = Builder.getIsFPConstrained()
18263 ? Intrinsic::experimental_constrained_floor
18264 : Intrinsic::floor;
18265 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
18266 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
18267 ID = Builder.getIsFPConstrained()
18268 ? Intrinsic::experimental_constrained_round
18269 : Intrinsic::round;
18270 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
18271 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
18272 ID = Builder.getIsFPConstrained()
18273 ? Intrinsic::experimental_constrained_rint
18274 : Intrinsic::rint;
18275 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
18276 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
18277 ID = Builder.getIsFPConstrained()
18278 ? Intrinsic::experimental_constrained_ceil
18279 : Intrinsic::ceil;
18280 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
18281 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
18282 ID = Builder.getIsFPConstrained()
18283 ? Intrinsic::experimental_constrained_trunc
18284 : Intrinsic::trunc;
18285 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18286 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
18287 : Builder.CreateCall(F, X);
18288 }
18289
18290 // Absolute value
18291 case PPC::BI__builtin_vsx_xvabsdp:
18292 case PPC::BI__builtin_vsx_xvabssp: {
18293 llvm::Type *ResultType = ConvertType(E->getType());
18294 Value *X = EmitScalarExpr(E->getArg(0));
18295 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18296 return Builder.CreateCall(F, X);
18297 }
18298
18299 // Fastmath by default
18300 case PPC::BI__builtin_ppc_recipdivf:
18301 case PPC::BI__builtin_ppc_recipdivd:
18302 case PPC::BI__builtin_ppc_rsqrtf:
18303 case PPC::BI__builtin_ppc_rsqrtd: {
18304 FastMathFlags FMF = Builder.getFastMathFlags();
18305 Builder.getFastMathFlags().setFast();
18306 llvm::Type *ResultType = ConvertType(E->getType());
18307 Value *X = EmitScalarExpr(E->getArg(0));
18308
18309 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
18310 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
18311 Value *Y = EmitScalarExpr(E->getArg(1));
18312 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
18313 Builder.getFastMathFlags() &= (FMF);
18314 return FDiv;
18315 }
18316 auto *One = ConstantFP::get(ResultType, 1.0);
18317 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18318 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
18319 Builder.getFastMathFlags() &= (FMF);
18320 return FDiv;
18321 }
18322 case PPC::BI__builtin_ppc_alignx: {
18323 Value *Op0 = EmitScalarExpr(E->getArg(0));
18324 Value *Op1 = EmitScalarExpr(E->getArg(1));
18325 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
18326 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
18327 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
18328 llvm::Value::MaximumAlignment);
18329
18330 emitAlignmentAssumption(Op1, E->getArg(1),
18331 /*The expr loc is sufficient.*/ SourceLocation(),
18332 AlignmentCI, nullptr);
18333 return Op1;
18334 }
18335 case PPC::BI__builtin_ppc_rdlam: {
18336 Value *Op0 = EmitScalarExpr(E->getArg(0));
18337 Value *Op1 = EmitScalarExpr(E->getArg(1));
18338 Value *Op2 = EmitScalarExpr(E->getArg(2));
18339 llvm::Type *Ty = Op0->getType();
18340 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
18341 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
18342 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
18343 return Builder.CreateAnd(Rotate, Op2);
18344 }
18345 case PPC::BI__builtin_ppc_load2r: {
18346 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
18347 Value *Op0 = EmitScalarExpr(E->getArg(0));
18348 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
18349 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
18350 }
18351 // FMA variations
18352 case PPC::BI__builtin_ppc_fnmsub:
18353 case PPC::BI__builtin_ppc_fnmsubs:
18354 case PPC::BI__builtin_vsx_xvmaddadp:
18355 case PPC::BI__builtin_vsx_xvmaddasp:
18356 case PPC::BI__builtin_vsx_xvnmaddadp:
18357 case PPC::BI__builtin_vsx_xvnmaddasp:
18358 case PPC::BI__builtin_vsx_xvmsubadp:
18359 case PPC::BI__builtin_vsx_xvmsubasp:
18360 case PPC::BI__builtin_vsx_xvnmsubadp:
18361 case PPC::BI__builtin_vsx_xvnmsubasp: {
18362 llvm::Type *ResultType = ConvertType(E->getType());
18363 Value *X = EmitScalarExpr(E->getArg(0));
18364 Value *Y = EmitScalarExpr(E->getArg(1));
18365 Value *Z = EmitScalarExpr(E->getArg(2));
18366 llvm::Function *F;
18367 if (Builder.getIsFPConstrained())
18368 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18369 else
18370 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18371 switch (BuiltinID) {
18372 case PPC::BI__builtin_vsx_xvmaddadp:
18373 case PPC::BI__builtin_vsx_xvmaddasp:
18374 if (Builder.getIsFPConstrained())
18375 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
18376 else
18377 return Builder.CreateCall(F, {X, Y, Z});
18378 case PPC::BI__builtin_vsx_xvnmaddadp:
18379 case PPC::BI__builtin_vsx_xvnmaddasp:
18380 if (Builder.getIsFPConstrained())
18381 return Builder.CreateFNeg(
18382 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
18383 else
18384 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
18385 case PPC::BI__builtin_vsx_xvmsubadp:
18386 case PPC::BI__builtin_vsx_xvmsubasp:
18387 if (Builder.getIsFPConstrained())
18388 return Builder.CreateConstrainedFPCall(
18389 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18390 else
18391 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18392 case PPC::BI__builtin_ppc_fnmsub:
18393 case PPC::BI__builtin_ppc_fnmsubs:
18394 case PPC::BI__builtin_vsx_xvnmsubadp:
18395 case PPC::BI__builtin_vsx_xvnmsubasp:
18396 if (Builder.getIsFPConstrained())
18397 return Builder.CreateFNeg(
18398 Builder.CreateConstrainedFPCall(
18399 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
18400 "neg");
18401 else
18402 return Builder.CreateCall(
18403 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
18404 }
18405 llvm_unreachable("Unknown FMA operation");
18406 return nullptr; // Suppress no-return warning
18407 }
18408
18409 case PPC::BI__builtin_vsx_insertword: {
18410 Value *Op0 = EmitScalarExpr(E->getArg(0));
18411 Value *Op1 = EmitScalarExpr(E->getArg(1));
18412 Value *Op2 = EmitScalarExpr(E->getArg(2));
18413 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
18414
18415 // Third argument is a compile time constant int. It must be clamped to
18416 // to the range [0, 12].
18417 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18418 assert(ArgCI &&
18419 "Third arg to xxinsertw intrinsic must be constant integer");
18420 const int64_t MaxIndex = 12;
18421 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18422
18423 // The builtin semantics don't exactly match the xxinsertw instructions
18424 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
18425 // word from the first argument, and inserts it in the second argument. The
18426 // instruction extracts the word from its second input register and inserts
18427 // it into its first input register, so swap the first and second arguments.
18428 std::swap(Op0, Op1);
18429
18430 // Need to cast the second argument from a vector of unsigned int to a
18431 // vector of long long.
18432 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18433
18434 if (getTarget().isLittleEndian()) {
18435 // Reverse the double words in the vector we will extract from.
18436 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18437 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
18438
18439 // Reverse the index.
18440 Index = MaxIndex - Index;
18441 }
18442
18443 // Intrinsic expects the first arg to be a vector of int.
18444 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18445 Op2 = ConstantInt::getSigned(Int32Ty, Index);
18446 return Builder.CreateCall(F, {Op0, Op1, Op2});
18447 }
18448
18449 case PPC::BI__builtin_vsx_extractuword: {
18450 Value *Op0 = EmitScalarExpr(E->getArg(0));
18451 Value *Op1 = EmitScalarExpr(E->getArg(1));
18452 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
18453
18454 // Intrinsic expects the first argument to be a vector of doublewords.
18455 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18456
18457 // The second argument is a compile time constant int that needs to
18458 // be clamped to the range [0, 12].
18459 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
18460 assert(ArgCI &&
18461 "Second Arg to xxextractuw intrinsic must be a constant integer!");
18462 const int64_t MaxIndex = 12;
18463 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18464
18465 if (getTarget().isLittleEndian()) {
18466 // Reverse the index.
18467 Index = MaxIndex - Index;
18468 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18469
18470 // Emit the call, then reverse the double words of the results vector.
18471 Value *Call = Builder.CreateCall(F, {Op0, Op1});
18472
18473 Value *ShuffleCall =
18474 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
18475 return ShuffleCall;
18476 } else {
18477 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18478 return Builder.CreateCall(F, {Op0, Op1});
18479 }
18480 }
18481
18482 case PPC::BI__builtin_vsx_xxpermdi: {
18483 Value *Op0 = EmitScalarExpr(E->getArg(0));
18484 Value *Op1 = EmitScalarExpr(E->getArg(1));
18485 Value *Op2 = EmitScalarExpr(E->getArg(2));
18486 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18487 assert(ArgCI && "Third arg must be constant integer!");
18488
18489 unsigned Index = ArgCI->getZExtValue();
18490 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18491 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18492
18493 // Account for endianness by treating this as just a shuffle. So we use the
18494 // same indices for both LE and BE in order to produce expected results in
18495 // both cases.
18496 int ElemIdx0 = (Index & 2) >> 1;
18497 int ElemIdx1 = 2 + (Index & 1);
18498
18499 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
18500 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18501 QualType BIRetType = E->getType();
18502 auto RetTy = ConvertType(BIRetType);
18503 return Builder.CreateBitCast(ShuffleCall, RetTy);
18504 }
18505
18506 case PPC::BI__builtin_vsx_xxsldwi: {
18507 Value *Op0 = EmitScalarExpr(E->getArg(0));
18508 Value *Op1 = EmitScalarExpr(E->getArg(1));
18509 Value *Op2 = EmitScalarExpr(E->getArg(2));
18510 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18511 assert(ArgCI && "Third argument must be a compile time constant");
18512 unsigned Index = ArgCI->getZExtValue() & 0x3;
18513 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18514 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
18515
18516 // Create a shuffle mask
18517 int ElemIdx0;
18518 int ElemIdx1;
18519 int ElemIdx2;
18520 int ElemIdx3;
18521 if (getTarget().isLittleEndian()) {
18522 // Little endian element N comes from element 8+N-Index of the
18523 // concatenated wide vector (of course, using modulo arithmetic on
18524 // the total number of elements).
18525 ElemIdx0 = (8 - Index) % 8;
18526 ElemIdx1 = (9 - Index) % 8;
18527 ElemIdx2 = (10 - Index) % 8;
18528 ElemIdx3 = (11 - Index) % 8;
18529 } else {
18530 // Big endian ElemIdx<N> = Index + N
18531 ElemIdx0 = Index;
18532 ElemIdx1 = Index + 1;
18533 ElemIdx2 = Index + 2;
18534 ElemIdx3 = Index + 3;
18535 }
18536
18537 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
18538 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18539 QualType BIRetType = E->getType();
18540 auto RetTy = ConvertType(BIRetType);
18541 return Builder.CreateBitCast(ShuffleCall, RetTy);
18542 }
18543
18544 case PPC::BI__builtin_pack_vector_int128: {
18545 Value *Op0 = EmitScalarExpr(E->getArg(0));
18546 Value *Op1 = EmitScalarExpr(E->getArg(1));
18547 bool isLittleEndian = getTarget().isLittleEndian();
18548 Value *PoisonValue =
18549 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
18550 Value *Res = Builder.CreateInsertElement(
18551 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
18552 Res = Builder.CreateInsertElement(Res, Op1,
18553 (uint64_t)(isLittleEndian ? 0 : 1));
18554 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
18555 }
18556
18557 case PPC::BI__builtin_unpack_vector_int128: {
18558 Value *Op0 = EmitScalarExpr(E->getArg(0));
18559 Value *Op1 = EmitScalarExpr(E->getArg(1));
18560 ConstantInt *Index = cast<ConstantInt>(Op1);
18561 Value *Unpacked = Builder.CreateBitCast(
18562 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
18563
18564 if (getTarget().isLittleEndian())
18565 Index =
18566 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
18567
18568 return Builder.CreateExtractElement(Unpacked, Index);
18569 }
18570
18571 case PPC::BI__builtin_ppc_sthcx: {
18572 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
18573 Value *Op0 = EmitScalarExpr(E->getArg(0));
18574 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
18575 return Builder.CreateCall(F, {Op0, Op1});
18576 }
18577
18578 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
18579 // Some of the MMA instructions accumulate their result into an existing
18580 // accumulator whereas the others generate a new accumulator. So we need to
18581 // use custom code generation to expand a builtin call with a pointer to a
18582 // load (if the corresponding instruction accumulates its result) followed by
18583 // the call to the intrinsic and a store of the result.
18584#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18585 case PPC::BI__builtin_##Name:
18586#include "clang/Basic/BuiltinsPPC.def"
18587 {
18589 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
18590 if (E->getArg(i)->getType()->isArrayType())
18591 Ops.push_back(
18592 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
18593 else
18594 Ops.push_back(EmitScalarExpr(E->getArg(i)));
18595 // The first argument of these two builtins is a pointer used to store their
18596 // result. However, the llvm intrinsics return their result in multiple
18597 // return values. So, here we emit code extracting these values from the
18598 // intrinsic results and storing them using that pointer.
18599 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
18600 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
18601 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
18602 unsigned NumVecs = 2;
18603 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
18604 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
18605 NumVecs = 4;
18606 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
18607 }
18608 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
18609 Address Addr = EmitPointerWithAlignment(E->getArg(1));
18610 Value *Vec = Builder.CreateLoad(Addr);
18611 Value *Call = Builder.CreateCall(F, {Vec});
18612 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
18613 Value *Ptr = Ops[0];
18614 for (unsigned i=0; i<NumVecs; i++) {
18615 Value *Vec = Builder.CreateExtractValue(Call, i);
18616 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
18617 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
18618 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
18619 }
18620 return Call;
18621 }
18622 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
18623 BuiltinID == PPC::BI__builtin_mma_build_acc) {
18624 // Reverse the order of the operands for LE, so the
18625 // same builtin call can be used on both LE and BE
18626 // without the need for the programmer to swap operands.
18627 // The operands are reversed starting from the second argument,
18628 // the first operand is the pointer to the pair/accumulator
18629 // that is being built.
18630 if (getTarget().isLittleEndian())
18631 std::reverse(Ops.begin() + 1, Ops.end());
18632 }
18633 bool Accumulate;
18634 switch (BuiltinID) {
18635 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18636 case PPC::BI__builtin_##Name: \
18637 ID = Intrinsic::ppc_##Intr; \
18638 Accumulate = Acc; \
18639 break;
18640 #include "clang/Basic/BuiltinsPPC.def"
18641 }
18642 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18643 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
18644 BuiltinID == PPC::BI__builtin_mma_lxvp ||
18645 BuiltinID == PPC::BI__builtin_mma_stxvp) {
18646 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18647 BuiltinID == PPC::BI__builtin_mma_lxvp) {
18648 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
18649 } else {
18650 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
18651 }
18652 Ops.pop_back();
18653 llvm::Function *F = CGM.getIntrinsic(ID);
18654 return Builder.CreateCall(F, Ops, "");
18655 }
18656 SmallVector<Value*, 4> CallOps;
18657 if (Accumulate) {
18658 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18659 Value *Acc = Builder.CreateLoad(Addr);
18660 CallOps.push_back(Acc);
18661 }
18662 for (unsigned i=1; i<Ops.size(); i++)
18663 CallOps.push_back(Ops[i]);
18664 llvm::Function *F = CGM.getIntrinsic(ID);
18665 Value *Call = Builder.CreateCall(F, CallOps);
18666 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());
18667 }
18668
18669 case PPC::BI__builtin_ppc_compare_and_swap:
18670 case PPC::BI__builtin_ppc_compare_and_swaplp: {
18671 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18672 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
18673 Value *OldVal = Builder.CreateLoad(OldValAddr);
18674 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
18675 LValue LV = MakeAddrLValue(Addr, AtomicTy);
18676 Value *Op2 = EmitScalarExpr(E->getArg(2));
18677 auto Pair = EmitAtomicCompareExchange(
18678 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
18679 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
18680 // Unlike c11's atomic_compare_exchange, according to
18681 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
18682 // > In either case, the contents of the memory location specified by addr
18683 // > are copied into the memory location specified by old_val_addr.
18684 // But it hasn't specified storing to OldValAddr is atomic or not and
18685 // which order to use. Now following XL's codegen, treat it as a normal
18686 // store.
18687 Value *LoadedVal = Pair.first.getScalarVal();
18688 Builder.CreateStore(LoadedVal, OldValAddr);
18689 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
18690 }
18691 case PPC::BI__builtin_ppc_fetch_and_add:
18692 case PPC::BI__builtin_ppc_fetch_and_addlp: {
18693 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
18694 llvm::AtomicOrdering::Monotonic);
18695 }
18696 case PPC::BI__builtin_ppc_fetch_and_and:
18697 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18698 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
18699 llvm::AtomicOrdering::Monotonic);
18700 }
18701
18702 case PPC::BI__builtin_ppc_fetch_and_or:
18703 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18704 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
18705 llvm::AtomicOrdering::Monotonic);
18706 }
18707 case PPC::BI__builtin_ppc_fetch_and_swap:
18708 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18709 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
18710 llvm::AtomicOrdering::Monotonic);
18711 }
18712 case PPC::BI__builtin_ppc_ldarx:
18713 case PPC::BI__builtin_ppc_lwarx:
18714 case PPC::BI__builtin_ppc_lharx:
18715 case PPC::BI__builtin_ppc_lbarx:
18716 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
18717 case PPC::BI__builtin_ppc_mfspr: {
18718 Value *Op0 = EmitScalarExpr(E->getArg(0));
18719 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18720 ? Int32Ty
18721 : Int64Ty;
18722 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
18723 return Builder.CreateCall(F, {Op0});
18724 }
18725 case PPC::BI__builtin_ppc_mtspr: {
18726 Value *Op0 = EmitScalarExpr(E->getArg(0));
18727 Value *Op1 = EmitScalarExpr(E->getArg(1));
18728 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18729 ? Int32Ty
18730 : Int64Ty;
18731 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
18732 return Builder.CreateCall(F, {Op0, Op1});
18733 }
18734 case PPC::BI__builtin_ppc_popcntb: {
18735 Value *ArgValue = EmitScalarExpr(E->getArg(0));
18736 llvm::Type *ArgType = ArgValue->getType();
18737 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
18738 return Builder.CreateCall(F, {ArgValue}, "popcntb");
18739 }
18740 case PPC::BI__builtin_ppc_mtfsf: {
18741 // The builtin takes a uint32 that needs to be cast to an
18742 // f64 to be passed to the intrinsic.
18743 Value *Op0 = EmitScalarExpr(E->getArg(0));
18744 Value *Op1 = EmitScalarExpr(E->getArg(1));
18745 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18746 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18747 return Builder.CreateCall(F, {Op0, Cast}, "");
18748 }
18749
18750 case PPC::BI__builtin_ppc_swdiv_nochk:
18751 case PPC::BI__builtin_ppc_swdivs_nochk: {
18752 Value *Op0 = EmitScalarExpr(E->getArg(0));
18753 Value *Op1 = EmitScalarExpr(E->getArg(1));
18754 FastMathFlags FMF = Builder.getFastMathFlags();
18755 Builder.getFastMathFlags().setFast();
18756 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18757 Builder.getFastMathFlags() &= (FMF);
18758 return FDiv;
18759 }
18760 case PPC::BI__builtin_ppc_fric:
18762 *this, E, Intrinsic::rint,
18763 Intrinsic::experimental_constrained_rint))
18764 .getScalarVal();
18765 case PPC::BI__builtin_ppc_frim:
18766 case PPC::BI__builtin_ppc_frims:
18768 *this, E, Intrinsic::floor,
18769 Intrinsic::experimental_constrained_floor))
18770 .getScalarVal();
18771 case PPC::BI__builtin_ppc_frin:
18772 case PPC::BI__builtin_ppc_frins:
18774 *this, E, Intrinsic::round,
18775 Intrinsic::experimental_constrained_round))
18776 .getScalarVal();
18777 case PPC::BI__builtin_ppc_frip:
18778 case PPC::BI__builtin_ppc_frips:
18780 *this, E, Intrinsic::ceil,
18781 Intrinsic::experimental_constrained_ceil))
18782 .getScalarVal();
18783 case PPC::BI__builtin_ppc_friz:
18784 case PPC::BI__builtin_ppc_frizs:
18786 *this, E, Intrinsic::trunc,
18787 Intrinsic::experimental_constrained_trunc))
18788 .getScalarVal();
18789 case PPC::BI__builtin_ppc_fsqrt:
18790 case PPC::BI__builtin_ppc_fsqrts:
18792 *this, E, Intrinsic::sqrt,
18793 Intrinsic::experimental_constrained_sqrt))
18794 .getScalarVal();
18795 case PPC::BI__builtin_ppc_test_data_class: {
18796 Value *Op0 = EmitScalarExpr(E->getArg(0));
18797 Value *Op1 = EmitScalarExpr(E->getArg(1));
18798 return Builder.CreateCall(
18799 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18800 {Op0, Op1}, "test_data_class");
18801 }
18802 case PPC::BI__builtin_ppc_maxfe: {
18803 Value *Op0 = EmitScalarExpr(E->getArg(0));
18804 Value *Op1 = EmitScalarExpr(E->getArg(1));
18805 Value *Op2 = EmitScalarExpr(E->getArg(2));
18806 Value *Op3 = EmitScalarExpr(E->getArg(3));
18807 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18808 {Op0, Op1, Op2, Op3});
18809 }
18810 case PPC::BI__builtin_ppc_maxfl: {
18811 Value *Op0 = EmitScalarExpr(E->getArg(0));
18812 Value *Op1 = EmitScalarExpr(E->getArg(1));
18813 Value *Op2 = EmitScalarExpr(E->getArg(2));
18814 Value *Op3 = EmitScalarExpr(E->getArg(3));
18815 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18816 {Op0, Op1, Op2, Op3});
18817 }
18818 case PPC::BI__builtin_ppc_maxfs: {
18819 Value *Op0 = EmitScalarExpr(E->getArg(0));
18820 Value *Op1 = EmitScalarExpr(E->getArg(1));
18821 Value *Op2 = EmitScalarExpr(E->getArg(2));
18822 Value *Op3 = EmitScalarExpr(E->getArg(3));
18823 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18824 {Op0, Op1, Op2, Op3});
18825 }
18826 case PPC::BI__builtin_ppc_minfe: {
18827 Value *Op0 = EmitScalarExpr(E->getArg(0));
18828 Value *Op1 = EmitScalarExpr(E->getArg(1));
18829 Value *Op2 = EmitScalarExpr(E->getArg(2));
18830 Value *Op3 = EmitScalarExpr(E->getArg(3));
18831 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18832 {Op0, Op1, Op2, Op3});
18833 }
18834 case PPC::BI__builtin_ppc_minfl: {
18835 Value *Op0 = EmitScalarExpr(E->getArg(0));
18836 Value *Op1 = EmitScalarExpr(E->getArg(1));
18837 Value *Op2 = EmitScalarExpr(E->getArg(2));
18838 Value *Op3 = EmitScalarExpr(E->getArg(3));
18839 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18840 {Op0, Op1, Op2, Op3});
18841 }
18842 case PPC::BI__builtin_ppc_minfs: {
18843 Value *Op0 = EmitScalarExpr(E->getArg(0));
18844 Value *Op1 = EmitScalarExpr(E->getArg(1));
18845 Value *Op2 = EmitScalarExpr(E->getArg(2));
18846 Value *Op3 = EmitScalarExpr(E->getArg(3));
18847 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18848 {Op0, Op1, Op2, Op3});
18849 }
18850 case PPC::BI__builtin_ppc_swdiv:
18851 case PPC::BI__builtin_ppc_swdivs: {
18852 Value *Op0 = EmitScalarExpr(E->getArg(0));
18853 Value *Op1 = EmitScalarExpr(E->getArg(1));
18854 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18855 }
18856 case PPC::BI__builtin_ppc_set_fpscr_rn:
18857 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18858 {EmitScalarExpr(E->getArg(0))});
18859 case PPC::BI__builtin_ppc_mffs:
18860 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18861 }
18862}
18863
18864namespace {
18865// If \p E is not null pointer, insert address space cast to match return
18866// type of \p E if necessary.
18867Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18868 const CallExpr *E = nullptr) {
18869 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18870 auto *Call = CGF.Builder.CreateCall(F);
18871 Call->addRetAttr(
18872 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18873 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18874 if (!E)
18875 return Call;
18876 QualType BuiltinRetType = E->getType();
18877 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18878 if (RetTy == Call->getType())
18879 return Call;
18880 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18881}
18882
18883Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18884 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18885 auto *Call = CGF.Builder.CreateCall(F);
18886 Call->addRetAttr(
18887 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18888 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18889 return Call;
18890}
18891
18892// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18893/// Emit code based on Code Object ABI version.
18894/// COV_4 : Emit code to use dispatch ptr
18895/// COV_5+ : Emit code to use implicitarg ptr
18896/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18897/// and use its value for COV_4 or COV_5+ approach. It is used for
18898/// compiling device libraries in an ABI-agnostic way.
18899///
18900/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18901/// clang during compilation of user code.
18902Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18903 llvm::LoadInst *LD;
18904
18905 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18906
18907 if (Cov == CodeObjectVersionKind::COV_None) {
18908 StringRef Name = "__oclc_ABI_version";
18909 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18910 if (!ABIVersionC)
18911 ABIVersionC = new llvm::GlobalVariable(
18912 CGF.CGM.getModule(), CGF.Int32Ty, false,
18913 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18914 llvm::GlobalVariable::NotThreadLocal,
18916
18917 // This load will be eliminated by the IPSCCP because it is constant
18918 // weak_odr without externally_initialized. Either changing it to weak or
18919 // adding externally_initialized will keep the load.
18920 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18921 CGF.CGM.getIntAlign());
18922
18923 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18924 ABIVersion,
18925 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
18926
18927 // Indexing the implicit kernarg segment.
18928 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18929 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18930
18931 // Indexing the HSA kernel_dispatch_packet struct.
18932 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18933 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18934
18935 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18936 LD = CGF.Builder.CreateLoad(
18938 } else {
18939 Value *GEP = nullptr;
18940 if (Cov >= CodeObjectVersionKind::COV_5) {
18941 // Indexing the implicit kernarg segment.
18942 GEP = CGF.Builder.CreateConstGEP1_32(
18943 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18944 } else {
18945 // Indexing the HSA kernel_dispatch_packet struct.
18946 GEP = CGF.Builder.CreateConstGEP1_32(
18947 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18948 }
18949 LD = CGF.Builder.CreateLoad(
18951 }
18952
18953 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
18954 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
18955 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
18956 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
18957 LD->setMetadata(llvm::LLVMContext::MD_noundef,
18958 llvm::MDNode::get(CGF.getLLVMContext(), {}));
18959 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18960 llvm::MDNode::get(CGF.getLLVMContext(), {}));
18961 return LD;
18962}
18963
18964// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18965Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
18966 const unsigned XOffset = 12;
18967 auto *DP = EmitAMDGPUDispatchPtr(CGF);
18968 // Indexing the HSA kernel_dispatch_packet struct.
18969 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
18970 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
18971 auto *LD = CGF.Builder.CreateLoad(
18973
18974 llvm::MDBuilder MDB(CGF.getLLVMContext());
18975
18976 // Known non-zero.
18977 LD->setMetadata(llvm::LLVMContext::MD_range,
18978 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
18979 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18980 llvm::MDNode::get(CGF.getLLVMContext(), {}));
18981 return LD;
18982}
18983} // namespace
18984
18985// For processing memory ordering and memory scope arguments of various
18986// amdgcn builtins.
18987// \p Order takes a C++11 comptabile memory-ordering specifier and converts
18988// it into LLVM's memory ordering specifier using atomic C ABI, and writes
18989// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
18990// specific SyncScopeID and writes it to \p SSID.
18992 llvm::AtomicOrdering &AO,
18993 llvm::SyncScope::ID &SSID) {
18994 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
18995
18996 // Map C11/C++11 memory ordering to LLVM memory ordering
18997 assert(llvm::isValidAtomicOrderingCABI(ord));
18998 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
18999 case llvm::AtomicOrderingCABI::acquire:
19000 case llvm::AtomicOrderingCABI::consume:
19001 AO = llvm::AtomicOrdering::Acquire;
19002 break;
19003 case llvm::AtomicOrderingCABI::release:
19004 AO = llvm::AtomicOrdering::Release;
19005 break;
19006 case llvm::AtomicOrderingCABI::acq_rel:
19007 AO = llvm::AtomicOrdering::AcquireRelease;
19008 break;
19009 case llvm::AtomicOrderingCABI::seq_cst:
19010 AO = llvm::AtomicOrdering::SequentiallyConsistent;
19011 break;
19012 case llvm::AtomicOrderingCABI::relaxed:
19013 AO = llvm::AtomicOrdering::Monotonic;
19014 break;
19015 }
19016
19017 // Some of the atomic builtins take the scope as a string name.
19018 StringRef scp;
19019 if (llvm::getConstantStringInfo(Scope, scp)) {
19020 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
19021 return;
19022 }
19023
19024 // Older builtins had an enum argument for the memory scope.
19025 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
19026 switch (scope) {
19027 case 0: // __MEMORY_SCOPE_SYSTEM
19028 SSID = llvm::SyncScope::System;
19029 break;
19030 case 1: // __MEMORY_SCOPE_DEVICE
19031 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
19032 break;
19033 case 2: // __MEMORY_SCOPE_WRKGRP
19034 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
19035 break;
19036 case 3: // __MEMORY_SCOPE_WVFRNT
19037 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
19038 break;
19039 case 4: // __MEMORY_SCOPE_SINGLE
19040 SSID = llvm::SyncScope::SingleThread;
19041 break;
19042 default:
19043 SSID = llvm::SyncScope::System;
19044 break;
19045 }
19046}
19047
19048llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
19049 unsigned Idx,
19050 const CallExpr *E) {
19051 llvm::Value *Arg = nullptr;
19052 if ((ICEArguments & (1 << Idx)) == 0) {
19053 Arg = EmitScalarExpr(E->getArg(Idx));
19054 } else {
19055 // If this is required to be a constant, constant fold it so that we
19056 // know that the generated intrinsic gets a ConstantInt.
19057 std::optional<llvm::APSInt> Result =
19058 E->getArg(Idx)->getIntegerConstantExpr(getContext());
19059 assert(Result && "Expected argument to be a constant");
19060 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
19061 }
19062 return Arg;
19063}
19064
19065// Return dot product intrinsic that corresponds to the QT scalar type
19066static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19067 if (QT->isFloatingType())
19068 return RT.getFDotIntrinsic();
19069 if (QT->isSignedIntegerType())
19070 return RT.getSDotIntrinsic();
19071 assert(QT->isUnsignedIntegerType());
19072 return RT.getUDotIntrinsic();
19073}
19074
19075static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19077 return RT.getFirstBitSHighIntrinsic();
19078 }
19079
19081 return RT.getFirstBitUHighIntrinsic();
19082}
19083
19084// Return wave active sum that corresponds to the QT scalar type
19085static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
19086 CGHLSLRuntime &RT, QualType QT) {
19087 switch (Arch) {
19088 case llvm::Triple::spirv:
19089 return llvm::Intrinsic::spv_wave_reduce_sum;
19090 case llvm::Triple::dxil: {
19091 if (QT->isUnsignedIntegerType())
19092 return llvm::Intrinsic::dx_wave_reduce_usum;
19093 return llvm::Intrinsic::dx_wave_reduce_sum;
19094 }
19095 default:
19096 llvm_unreachable("Intrinsic WaveActiveSum"
19097 " not supported by target architecture");
19098 }
19099}
19100
19102 const CallExpr *E,
19103 ReturnValueSlot ReturnValue) {
19104 if (!getLangOpts().HLSL)
19105 return nullptr;
19106
19107 switch (BuiltinID) {
19108 case Builtin::BI__builtin_hlsl_resource_getpointer: {
19109 Value *HandleOp = EmitScalarExpr(E->getArg(0));
19110 Value *IndexOp = EmitScalarExpr(E->getArg(1));
19111
19112 // TODO: Map to an hlsl_device address space.
19113 llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext());
19114
19115 return Builder.CreateIntrinsic(
19116 RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
19117 ArrayRef<Value *>{HandleOp, IndexOp});
19118 }
19119 case Builtin::BI__builtin_hlsl_all: {
19120 Value *Op0 = EmitScalarExpr(E->getArg(0));
19121 return Builder.CreateIntrinsic(
19122 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19123 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19124 "hlsl.all");
19125 }
19126 case Builtin::BI__builtin_hlsl_any: {
19127 Value *Op0 = EmitScalarExpr(E->getArg(0));
19128 return Builder.CreateIntrinsic(
19129 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19130 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19131 "hlsl.any");
19132 }
19133 case Builtin::BI__builtin_hlsl_asdouble:
19134 return handleAsDoubleBuiltin(*this, E);
19135 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
19136 Value *OpX = EmitScalarExpr(E->getArg(0));
19137 Value *OpMin = EmitScalarExpr(E->getArg(1));
19138 Value *OpMax = EmitScalarExpr(E->getArg(2));
19139
19140 QualType Ty = E->getArg(0)->getType();
19141 if (auto *VecTy = Ty->getAs<VectorType>())
19142 Ty = VecTy->getElementType();
19143
19144 Intrinsic::ID Intr;
19145 if (Ty->isFloatingType()) {
19146 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
19147 } else if (Ty->isUnsignedIntegerType()) {
19148 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
19149 } else {
19150 assert(Ty->isSignedIntegerType());
19151 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
19152 }
19153 return Builder.CreateIntrinsic(
19154 /*ReturnType=*/OpX->getType(), Intr,
19155 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
19156 }
19157 case Builtin::BI__builtin_hlsl_cross: {
19158 Value *Op0 = EmitScalarExpr(E->getArg(0));
19159 Value *Op1 = EmitScalarExpr(E->getArg(1));
19160 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19161 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19162 "cross operands must have a float representation");
19163 // make sure each vector has exactly 3 elements
19164 assert(
19165 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19166 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19167 "input vectors must have 3 elements each");
19168 return Builder.CreateIntrinsic(
19169 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
19170 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
19171 }
19172 case Builtin::BI__builtin_hlsl_dot: {
19173 Value *Op0 = EmitScalarExpr(E->getArg(0));
19174 Value *Op1 = EmitScalarExpr(E->getArg(1));
19175 llvm::Type *T0 = Op0->getType();
19176 llvm::Type *T1 = Op1->getType();
19177
19178 // If the arguments are scalars, just emit a multiply
19179 if (!T0->isVectorTy() && !T1->isVectorTy()) {
19180 if (T0->isFloatingPointTy())
19181 return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
19182
19183 if (T0->isIntegerTy())
19184 return Builder.CreateMul(Op0, Op1, "hlsl.dot");
19185
19186 llvm_unreachable(
19187 "Scalar dot product is only supported on ints and floats.");
19188 }
19189 // For vectors, validate types and emit the appropriate intrinsic
19190
19191 // A VectorSplat should have happened
19192 assert(T0->isVectorTy() && T1->isVectorTy() &&
19193 "Dot product of vector and scalar is not supported.");
19194
19195 auto *VecTy0 = E->getArg(0)->getType()->castAs<VectorType>();
19196 [[maybe_unused]] auto *VecTy1 =
19197 E->getArg(1)->getType()->castAs<VectorType>();
19198
19199 assert(VecTy0->getElementType() == VecTy1->getElementType() &&
19200 "Dot product of vectors need the same element types.");
19201
19202 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
19203 "Dot product requires vectors to be of the same size.");
19204
19205 return Builder.CreateIntrinsic(
19206 /*ReturnType=*/T0->getScalarType(),
19207 getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
19208 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
19209 }
19210 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
19211 Value *A = EmitScalarExpr(E->getArg(0));
19212 Value *B = EmitScalarExpr(E->getArg(1));
19213 Value *C = EmitScalarExpr(E->getArg(2));
19214
19215 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
19216 return Builder.CreateIntrinsic(
19217 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19218 "hlsl.dot4add.i8packed");
19219 }
19220 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
19221 Value *A = EmitScalarExpr(E->getArg(0));
19222 Value *B = EmitScalarExpr(E->getArg(1));
19223 Value *C = EmitScalarExpr(E->getArg(2));
19224
19225 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
19226 return Builder.CreateIntrinsic(
19227 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19228 "hlsl.dot4add.u8packed");
19229 }
19230 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
19231 Value *X = EmitScalarExpr(E->getArg(0));
19232
19233 return Builder.CreateIntrinsic(
19234 /*ReturnType=*/ConvertType(E->getType()),
19236 ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
19237 }
19238 case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
19239 Value *X = EmitScalarExpr(E->getArg(0));
19240
19241 return Builder.CreateIntrinsic(
19242 /*ReturnType=*/ConvertType(E->getType()),
19243 CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
19244 nullptr, "hlsl.firstbitlow");
19245 }
19246 case Builtin::BI__builtin_hlsl_lerp: {
19247 Value *X = EmitScalarExpr(E->getArg(0));
19248 Value *Y = EmitScalarExpr(E->getArg(1));
19249 Value *S = EmitScalarExpr(E->getArg(2));
19250 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19251 llvm_unreachable("lerp operand must have a float representation");
19252 return Builder.CreateIntrinsic(
19253 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
19254 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
19255 }
19256 case Builtin::BI__builtin_hlsl_normalize: {
19257 Value *X = EmitScalarExpr(E->getArg(0));
19258
19259 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19260 "normalize operand must have a float representation");
19261
19262 return Builder.CreateIntrinsic(
19263 /*ReturnType=*/X->getType(),
19264 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
19265 nullptr, "hlsl.normalize");
19266 }
19267 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
19268 Value *X = EmitScalarExpr(E->getArg(0));
19269
19270 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19271 "degree operand must have a float representation");
19272
19273 return Builder.CreateIntrinsic(
19274 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
19275 ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
19276 }
19277 case Builtin::BI__builtin_hlsl_elementwise_frac: {
19278 Value *Op0 = EmitScalarExpr(E->getArg(0));
19279 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19280 llvm_unreachable("frac operand must have a float representation");
19281 return Builder.CreateIntrinsic(
19282 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
19283 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
19284}
19285case Builtin::BI__builtin_hlsl_elementwise_isinf: {
19286 Value *Op0 = EmitScalarExpr(E->getArg(0));
19287 llvm::Type *Xty = Op0->getType();
19288 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
19289 if (Xty->isVectorTy()) {
19290 auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
19291 retType = llvm::VectorType::get(
19292 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19293 }
19294 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19295 llvm_unreachable("isinf operand must have a float representation");
19296 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
19297 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
19298 }
19299 case Builtin::BI__builtin_hlsl_mad: {
19300 Value *M = EmitScalarExpr(E->getArg(0));
19301 Value *A = EmitScalarExpr(E->getArg(1));
19302 Value *B = EmitScalarExpr(E->getArg(2));
19303 if (E->getArg(0)->getType()->hasFloatingRepresentation())
19304 return Builder.CreateIntrinsic(
19305 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
19306 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
19307
19308 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
19309 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19310 return Builder.CreateIntrinsic(
19311 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
19312 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
19313
19314 Value *Mul = Builder.CreateNSWMul(M, A);
19315 return Builder.CreateNSWAdd(Mul, B);
19316 }
19317 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
19318 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19319 return Builder.CreateIntrinsic(
19320 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
19321 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
19322
19323 Value *Mul = Builder.CreateNUWMul(M, A);
19324 return Builder.CreateNUWAdd(Mul, B);
19325 }
19326 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
19327 Value *Op0 = EmitScalarExpr(E->getArg(0));
19328 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19329 llvm_unreachable("rcp operand must have a float representation");
19330 llvm::Type *Ty = Op0->getType();
19331 llvm::Type *EltTy = Ty->getScalarType();
19332 Constant *One = Ty->isVectorTy()
19333 ? ConstantVector::getSplat(
19334 ElementCount::getFixed(
19335 cast<FixedVectorType>(Ty)->getNumElements()),
19336 ConstantFP::get(EltTy, 1.0))
19337 : ConstantFP::get(EltTy, 1.0);
19338 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
19339 }
19340 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
19341 Value *Op0 = EmitScalarExpr(E->getArg(0));
19342 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19343 llvm_unreachable("rsqrt operand must have a float representation");
19344 return Builder.CreateIntrinsic(
19345 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
19346 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
19347 }
19348 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
19349 Value *Op0 = EmitScalarExpr(E->getArg(0));
19350 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19351 "saturate operand must have a float representation");
19352 return Builder.CreateIntrinsic(
19353 /*ReturnType=*/Op0->getType(),
19354 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
19355 nullptr, "hlsl.saturate");
19356 }
19357 case Builtin::BI__builtin_hlsl_select: {
19358 Value *OpCond = EmitScalarExpr(E->getArg(0));
19359 RValue RValTrue = EmitAnyExpr(E->getArg(1));
19360 Value *OpTrue =
19361 RValTrue.isScalar()
19362 ? RValTrue.getScalarVal()
19363 : RValTrue.getAggregatePointer(E->getArg(1)->getType(), *this);
19364 RValue RValFalse = EmitAnyExpr(E->getArg(2));
19365 Value *OpFalse =
19366 RValFalse.isScalar()
19367 ? RValFalse.getScalarVal()
19368 : RValFalse.getAggregatePointer(E->getArg(2)->getType(), *this);
19369
19370 Value *SelectVal =
19371 Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
19372 if (!RValTrue.isScalar())
19373 Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
19374 ReturnValue.isVolatile());
19375
19376 return SelectVal;
19377 }
19378 case Builtin::BI__builtin_hlsl_step: {
19379 Value *Op0 = EmitScalarExpr(E->getArg(0));
19380 Value *Op1 = EmitScalarExpr(E->getArg(1));
19381 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19382 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19383 "step operands must have a float representation");
19384 return Builder.CreateIntrinsic(
19385 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
19386 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
19387 }
19388 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
19389 Value *Op = EmitScalarExpr(E->getArg(0));
19390 assert(Op->getType()->isIntegerTy(1) &&
19391 "Intrinsic WaveActiveAllTrue operand must be a bool");
19392
19393 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
19394 return EmitRuntimeCall(
19395 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19396 }
19397 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
19398 Value *Op = EmitScalarExpr(E->getArg(0));
19399 assert(Op->getType()->isIntegerTy(1) &&
19400 "Intrinsic WaveActiveAnyTrue operand must be a bool");
19401
19402 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
19403 return EmitRuntimeCall(
19404 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19405 }
19406 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
19407 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19408 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
19409 return EmitRuntimeCall(
19410 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
19411 ArrayRef{OpExpr});
19412 }
19413 case Builtin::BI__builtin_hlsl_wave_active_sum: {
19414 // Due to the use of variadic arguments, explicitly retreive argument
19415 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19416 llvm::FunctionType *FT = llvm::FunctionType::get(
19417 OpExpr->getType(), ArrayRef{OpExpr->getType()}, false);
19418 Intrinsic::ID IID = getWaveActiveSumIntrinsic(
19419 getTarget().getTriple().getArch(), CGM.getHLSLRuntime(),
19420 E->getArg(0)->getType());
19421
19422 // Get overloaded name
19423 std::string Name =
19424 Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule());
19425 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19426 /*Local=*/false,
19427 /*AssumeConvergent=*/true),
19428 ArrayRef{OpExpr}, "hlsl.wave.active.sum");
19429 }
19430 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
19431 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
19432 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
19433 // for the DirectX intrinsic and the demangled builtin name
19434 switch (CGM.getTarget().getTriple().getArch()) {
19435 case llvm::Triple::dxil:
19436 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
19437 &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
19438 case llvm::Triple::spirv:
19440 llvm::FunctionType::get(IntTy, {}, false),
19441 "__hlsl_wave_get_lane_index", {}, false, true));
19442 default:
19443 llvm_unreachable(
19444 "Intrinsic WaveGetLaneIndex not supported by target architecture");
19445 }
19446 }
19447 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
19448 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
19449 return EmitRuntimeCall(
19450 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19451 }
19452 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
19453 // Due to the use of variadic arguments we must explicitly retreive them and
19454 // create our function type.
19455 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19456 Value *OpIndex = EmitScalarExpr(E->getArg(1));
19457 llvm::FunctionType *FT = llvm::FunctionType::get(
19458 OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()},
19459 false);
19460
19461 // Get overloaded name
19462 std::string Name =
19463 Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
19464 ArrayRef{OpExpr->getType()}, &CGM.getModule());
19465 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19466 /*Local=*/false,
19467 /*AssumeConvergent=*/true),
19468 ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
19469 }
19470 case Builtin::BI__builtin_hlsl_elementwise_sign: {
19471 auto *Arg0 = E->getArg(0);
19472 Value *Op0 = EmitScalarExpr(Arg0);
19473 llvm::Type *Xty = Op0->getType();
19474 llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
19475 if (Xty->isVectorTy()) {
19476 auto *XVecTy = Arg0->getType()->castAs<VectorType>();
19477 retType = llvm::VectorType::get(
19478 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19479 }
19480 assert((Arg0->getType()->hasFloatingRepresentation() ||
19481 Arg0->getType()->hasIntegerRepresentation()) &&
19482 "sign operand must have a float or int representation");
19483
19485 Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
19486 return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
19487 ConstantInt::get(retType, 1), "hlsl.sign");
19488 }
19489
19490 return Builder.CreateIntrinsic(
19491 retType, CGM.getHLSLRuntime().getSignIntrinsic(),
19492 ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
19493 }
19494 case Builtin::BI__builtin_hlsl_elementwise_radians: {
19495 Value *Op0 = EmitScalarExpr(E->getArg(0));
19496 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19497 "radians operand must have a float representation");
19498 return Builder.CreateIntrinsic(
19499 /*ReturnType=*/Op0->getType(),
19500 CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
19501 nullptr, "hlsl.radians");
19502 }
19503 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
19504 Value *ResHandle = EmitScalarExpr(E->getArg(0));
19505 Value *Offset = EmitScalarExpr(E->getArg(1));
19506 Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
19507 return Builder.CreateIntrinsic(
19508 /*ReturnType=*/Offset->getType(),
19509 CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
19510 ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
19511 }
19512 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
19513
19514 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
19515 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
19516 E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
19517 "asuint operands types mismatch");
19518 return handleHlslSplitdouble(E, this);
19519 }
19520 case Builtin::BI__builtin_hlsl_elementwise_clip:
19521 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19522 "clip operands types mismatch");
19523 return handleHlslClip(E, this);
19524 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
19525 Intrinsic::ID ID =
19526 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
19527 return EmitRuntimeCall(
19528 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19529 }
19530 }
19531 return nullptr;
19532}
19533
19534void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
19535 const CallExpr *E) {
19536 constexpr const char *Tag = "amdgpu-as";
19537
19538 LLVMContext &Ctx = Inst->getContext();
19540 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
19541 llvm::Value *V = EmitScalarExpr(E->getArg(K));
19542 StringRef AS;
19543 if (llvm::getConstantStringInfo(V, AS)) {
19544 MMRAs.push_back({Tag, AS});
19545 // TODO: Delete the resulting unused constant?
19546 continue;
19547 }
19548 CGM.Error(E->getExprLoc(),
19549 "expected an address space name as a string literal");
19550 }
19551
19552 llvm::sort(MMRAs);
19553 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
19554 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
19555}
19556
19558 const CallExpr *E) {
19559 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
19560 llvm::SyncScope::ID SSID;
19561 switch (BuiltinID) {
19562 case AMDGPU::BI__builtin_amdgcn_div_scale:
19563 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
19564 // Translate from the intrinsics's struct return to the builtin's out
19565 // argument.
19566
19567 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
19568
19569 llvm::Value *X = EmitScalarExpr(E->getArg(0));
19570 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
19571 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
19572
19573 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
19574 X->getType());
19575
19576 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
19577
19578 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
19579 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
19580
19581 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
19582
19583 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
19584 Builder.CreateStore(FlagExt, FlagOutPtr);
19585 return Result;
19586 }
19587 case AMDGPU::BI__builtin_amdgcn_div_fmas:
19588 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
19589 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19590 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19591 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19592 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
19593
19594 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
19595 Src0->getType());
19596 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
19597 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
19598 }
19599
19600 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
19601 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19602 Intrinsic::amdgcn_ds_swizzle);
19603 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
19604 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
19605 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
19607 // Find out if any arguments are required to be integer constant
19608 // expressions.
19609 unsigned ICEArguments = 0;
19611 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
19612 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19613 llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
19614 unsigned Size = DataTy->getPrimitiveSizeInBits();
19615 llvm::Type *IntTy =
19616 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
19617 Function *F =
19618 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
19619 ? Intrinsic::amdgcn_mov_dpp8
19620 : Intrinsic::amdgcn_update_dpp,
19621 IntTy);
19622 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
19623 E->getNumArgs() == 2);
19624 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
19625 if (InsertOld)
19626 Args.push_back(llvm::PoisonValue::get(IntTy));
19627 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
19628 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
19629 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
19630 Size < 32) {
19631 if (!DataTy->isIntegerTy())
19632 V = Builder.CreateBitCast(
19633 V, llvm::IntegerType::get(Builder.getContext(), Size));
19634 V = Builder.CreateZExtOrBitCast(V, IntTy);
19635 }
19636 llvm::Type *ExpTy =
19637 F->getFunctionType()->getFunctionParamType(I + InsertOld);
19638 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
19639 }
19640 Value *V = Builder.CreateCall(F, Args);
19641 if (Size < 32 && !DataTy->isIntegerTy())
19642 V = Builder.CreateTrunc(
19643 V, llvm::IntegerType::get(Builder.getContext(), Size));
19644 return Builder.CreateTruncOrBitCast(V, DataTy);
19645 }
19646 case AMDGPU::BI__builtin_amdgcn_permlane16:
19647 case AMDGPU::BI__builtin_amdgcn_permlanex16:
19648 return emitBuiltinWithOneOverloadedType<6>(
19649 *this, E,
19650 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
19651 ? Intrinsic::amdgcn_permlane16
19652 : Intrinsic::amdgcn_permlanex16);
19653 case AMDGPU::BI__builtin_amdgcn_permlane64:
19654 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19655 Intrinsic::amdgcn_permlane64);
19656 case AMDGPU::BI__builtin_amdgcn_readlane:
19657 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19658 Intrinsic::amdgcn_readlane);
19659 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
19660 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19661 Intrinsic::amdgcn_readfirstlane);
19662 case AMDGPU::BI__builtin_amdgcn_div_fixup:
19663 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
19664 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
19665 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19666 Intrinsic::amdgcn_div_fixup);
19667 case AMDGPU::BI__builtin_amdgcn_trig_preop:
19668 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
19669 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
19670 case AMDGPU::BI__builtin_amdgcn_rcp:
19671 case AMDGPU::BI__builtin_amdgcn_rcpf:
19672 case AMDGPU::BI__builtin_amdgcn_rcph:
19673 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
19674 case AMDGPU::BI__builtin_amdgcn_sqrt:
19675 case AMDGPU::BI__builtin_amdgcn_sqrtf:
19676 case AMDGPU::BI__builtin_amdgcn_sqrth:
19677 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19678 Intrinsic::amdgcn_sqrt);
19679 case AMDGPU::BI__builtin_amdgcn_rsq:
19680 case AMDGPU::BI__builtin_amdgcn_rsqf:
19681 case AMDGPU::BI__builtin_amdgcn_rsqh:
19682 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
19683 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
19684 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
19685 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19686 Intrinsic::amdgcn_rsq_clamp);
19687 case AMDGPU::BI__builtin_amdgcn_sinf:
19688 case AMDGPU::BI__builtin_amdgcn_sinh:
19689 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
19690 case AMDGPU::BI__builtin_amdgcn_cosf:
19691 case AMDGPU::BI__builtin_amdgcn_cosh:
19692 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
19693 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
19694 return EmitAMDGPUDispatchPtr(*this, E);
19695 case AMDGPU::BI__builtin_amdgcn_logf:
19696 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
19697 case AMDGPU::BI__builtin_amdgcn_exp2f:
19698 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19699 Intrinsic::amdgcn_exp2);
19700 case AMDGPU::BI__builtin_amdgcn_log_clampf:
19701 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19702 Intrinsic::amdgcn_log_clamp);
19703 case AMDGPU::BI__builtin_amdgcn_ldexp:
19704 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
19705 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19706 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19707 llvm::Function *F =
19708 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
19709 return Builder.CreateCall(F, {Src0, Src1});
19710 }
19711 case AMDGPU::BI__builtin_amdgcn_ldexph: {
19712 // The raw instruction has a different behavior for out of bounds exponent
19713 // values (implicit truncation instead of saturate to short_min/short_max).
19714 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19715 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19716 llvm::Function *F =
19717 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
19718 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
19719 }
19720 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
19721 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
19722 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
19723 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19724 Intrinsic::amdgcn_frexp_mant);
19725 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
19726 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
19727 Value *Src0 = EmitScalarExpr(E->getArg(0));
19728 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19729 { Builder.getInt32Ty(), Src0->getType() });
19730 return Builder.CreateCall(F, Src0);
19731 }
19732 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
19733 Value *Src0 = EmitScalarExpr(E->getArg(0));
19734 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19735 { Builder.getInt16Ty(), Src0->getType() });
19736 return Builder.CreateCall(F, Src0);
19737 }
19738 case AMDGPU::BI__builtin_amdgcn_fract:
19739 case AMDGPU::BI__builtin_amdgcn_fractf:
19740 case AMDGPU::BI__builtin_amdgcn_fracth:
19741 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19742 Intrinsic::amdgcn_fract);
19743 case AMDGPU::BI__builtin_amdgcn_lerp:
19744 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19745 Intrinsic::amdgcn_lerp);
19746 case AMDGPU::BI__builtin_amdgcn_ubfe:
19747 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19748 Intrinsic::amdgcn_ubfe);
19749 case AMDGPU::BI__builtin_amdgcn_sbfe:
19750 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19751 Intrinsic::amdgcn_sbfe);
19752 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
19753 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
19754 llvm::Type *ResultType = ConvertType(E->getType());
19755 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19756 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
19757 return Builder.CreateCall(F, { Src });
19758 }
19759 case AMDGPU::BI__builtin_amdgcn_uicmp:
19760 case AMDGPU::BI__builtin_amdgcn_uicmpl:
19761 case AMDGPU::BI__builtin_amdgcn_sicmp:
19762 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
19763 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19764 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19765 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19766
19767 // FIXME-GFX10: How should 32 bit mask be handled?
19768 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
19769 { Builder.getInt64Ty(), Src0->getType() });
19770 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19771 }
19772 case AMDGPU::BI__builtin_amdgcn_fcmp:
19773 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
19774 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19775 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19776 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19777
19778 // FIXME-GFX10: How should 32 bit mask be handled?
19779 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
19780 { Builder.getInt64Ty(), Src0->getType() });
19781 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19782 }
19783 case AMDGPU::BI__builtin_amdgcn_class:
19784 case AMDGPU::BI__builtin_amdgcn_classf:
19785 case AMDGPU::BI__builtin_amdgcn_classh:
19786 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
19787 case AMDGPU::BI__builtin_amdgcn_fmed3f:
19788 case AMDGPU::BI__builtin_amdgcn_fmed3h:
19789 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19790 Intrinsic::amdgcn_fmed3);
19791 case AMDGPU::BI__builtin_amdgcn_ds_append:
19792 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
19793 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
19794 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
19795 Value *Src0 = EmitScalarExpr(E->getArg(0));
19796 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
19797 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
19798 }
19799 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19800 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19801 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19802 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19803 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19804 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19805 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19806 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19807 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19808 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19809 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19810 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19811 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19812 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
19813 Intrinsic::ID IID;
19814 switch (BuiltinID) {
19815 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19816 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19817 IID = Intrinsic::amdgcn_global_load_tr_b64;
19818 break;
19819 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19820 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19821 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19822 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19823 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19824 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19825 IID = Intrinsic::amdgcn_global_load_tr_b128;
19826 break;
19827 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19828 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
19829 break;
19830 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19831 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
19832 break;
19833 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19834 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
19835 break;
19836 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
19837 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19838 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19839 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
19840 break;
19841 }
19842 llvm::Type *LoadTy = ConvertType(E->getType());
19843 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
19844 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
19845 return Builder.CreateCall(F, {Addr});
19846 }
19847 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
19848 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
19849 {llvm::Type::getInt64Ty(getLLVMContext())});
19850 return Builder.CreateCall(F);
19851 }
19852 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
19853 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
19854 {llvm::Type::getInt64Ty(getLLVMContext())});
19855 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
19856 return Builder.CreateCall(F, {Env});
19857 }
19858 case AMDGPU::BI__builtin_amdgcn_read_exec:
19859 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
19860 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
19861 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
19862 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
19863 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
19864 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
19865 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
19866 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
19867 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
19868 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
19869 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
19870 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
19871 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
19872 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
19873 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
19874
19875 // The builtins take these arguments as vec4 where the last element is
19876 // ignored. The intrinsic takes them as vec3.
19877 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
19878 ArrayRef<int>{0, 1, 2});
19879 RayDir =
19880 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
19881 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
19882 ArrayRef<int>{0, 1, 2});
19883
19884 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
19885 {NodePtr->getType(), RayDir->getType()});
19886 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
19887 RayInverseDir, TextureDescr});
19888 }
19889
19890 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
19892 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19893 Args.push_back(EmitScalarExpr(E->getArg(i)));
19894
19895 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
19896 Value *Call = Builder.CreateCall(F, Args);
19897 Value *Rtn = Builder.CreateExtractValue(Call, 0);
19898 Value *A = Builder.CreateExtractValue(Call, 1);
19899 llvm::Type *RetTy = ConvertType(E->getType());
19900 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
19901 (uint64_t)0);
19902 return Builder.CreateInsertElement(I0, A, 1);
19903 }
19904 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
19905 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
19906 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
19908 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19909 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19910 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
19911 {VT, VT});
19912
19914 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
19915 Args.push_back(EmitScalarExpr(E->getArg(I)));
19916 return Builder.CreateCall(F, Args);
19917 }
19918 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19919 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19920 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19921 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
19922 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
19923 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
19924 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
19925 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
19926 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
19927 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
19928 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19929 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19930 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
19931 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
19932 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
19933 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
19934 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
19935 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
19936 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
19937 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
19938 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
19939 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
19940 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19941 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19942 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
19943 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
19944 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
19945 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
19946 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
19947 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
19948 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
19949 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
19950 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
19951 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
19952 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19953 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19954 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19955 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19956 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19957 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19958 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19959 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19960 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19961 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19962 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19963 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19964 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19965 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
19966 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
19967 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
19968 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
19969 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19970 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19971 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19972 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19973 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19974 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19975 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19976 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19977 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
19978
19979 // These operations perform a matrix multiplication and accumulation of
19980 // the form:
19981 // D = A * B + C
19982 // We need to specify one type for matrices AB and one for matrices CD.
19983 // Sparse matrix operations can have different types for A and B as well as
19984 // an additional type for sparsity index.
19985 // Destination type should be put before types used for source operands.
19986 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
19987 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
19988 // There is no need for the variable opsel argument, so always set it to
19989 // "false".
19990 bool AppendFalseForOpselArg = false;
19991 unsigned BuiltinWMMAOp;
19992
19993 switch (BuiltinID) {
19994 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19995 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19996 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19997 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19998 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19999 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
20000 break;
20001 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20002 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20003 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20004 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20005 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20006 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
20007 break;
20008 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20009 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20010 AppendFalseForOpselArg = true;
20011 [[fallthrough]];
20012 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20013 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20014 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20015 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
20016 break;
20017 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20018 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20019 AppendFalseForOpselArg = true;
20020 [[fallthrough]];
20021 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20022 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20023 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20024 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
20025 break;
20026 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20027 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20028 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20029 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
20030 break;
20031 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20032 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20033 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20034 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
20035 break;
20036 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20037 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20038 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20039 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20040 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20041 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
20042 break;
20043 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20044 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20045 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20046 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20047 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20048 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
20049 break;
20050 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20051 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20052 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20053 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
20054 break;
20055 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20056 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20057 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20058 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
20059 break;
20060 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20061 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20062 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20063 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
20064 break;
20065 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20066 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20067 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20068 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
20069 break;
20070 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20071 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20072 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20073 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
20074 break;
20075 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20076 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20077 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20078 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
20079 break;
20080 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20081 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20082 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20083 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
20084 break;
20085 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20086 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20087 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20088 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
20089 break;
20090 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20091 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20092 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20093 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
20094 break;
20095 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20096 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20097 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20098 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
20099 break;
20100 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20101 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20102 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20103 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
20104 break;
20105 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20106 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20107 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20108 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
20109 break;
20110 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20111 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20112 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20113 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
20114 break;
20115 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20116 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20117 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20118 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
20119 break;
20120 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20121 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20122 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20123 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
20124 break;
20125 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20126 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
20127 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20128 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
20129 break;
20130 }
20131
20133 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
20134 Args.push_back(EmitScalarExpr(E->getArg(i)));
20135 if (AppendFalseForOpselArg)
20136 Args.push_back(Builder.getFalse());
20137
20139 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
20140 ArgTypes.push_back(Args[ArgIdx]->getType());
20141
20142 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
20143 return Builder.CreateCall(F, Args);
20144 }
20145
20146 // amdgcn workitem
20147 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
20148 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
20149 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
20150 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
20151 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
20152 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
20153
20154 // amdgcn workgroup size
20155 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
20156 return EmitAMDGPUWorkGroupSize(*this, 0);
20157 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
20158 return EmitAMDGPUWorkGroupSize(*this, 1);
20159 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
20160 return EmitAMDGPUWorkGroupSize(*this, 2);
20161
20162 // amdgcn grid size
20163 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
20164 return EmitAMDGPUGridSize(*this, 0);
20165 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
20166 return EmitAMDGPUGridSize(*this, 1);
20167 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
20168 return EmitAMDGPUGridSize(*this, 2);
20169
20170 // r600 intrinsics
20171 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
20172 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
20173 return emitBuiltinWithOneOverloadedType<1>(*this, E,
20174 Intrinsic::r600_recipsqrt_ieee);
20175 case AMDGPU::BI__builtin_r600_read_tidig_x:
20176 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
20177 case AMDGPU::BI__builtin_r600_read_tidig_y:
20178 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
20179 case AMDGPU::BI__builtin_r600_read_tidig_z:
20180 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
20181 case AMDGPU::BI__builtin_amdgcn_alignbit: {
20182 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
20183 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
20184 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
20185 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
20186 return Builder.CreateCall(F, { Src0, Src1, Src2 });
20187 }
20188 case AMDGPU::BI__builtin_amdgcn_fence: {
20190 EmitScalarExpr(E->getArg(1)), AO, SSID);
20191 FenceInst *Fence = Builder.CreateFence(AO, SSID);
20192 if (E->getNumArgs() > 2)
20194 return Fence;
20195 }
20196 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20197 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20198 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20199 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20200 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20201 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20202 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20203 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20204 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20205 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20206 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20207 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20208 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20209 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20210 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20211 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20212 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20213 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20214 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20215 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20216 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20217 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20218 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
20219 llvm::AtomicRMWInst::BinOp BinOp;
20220 switch (BuiltinID) {
20221 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20222 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20223 BinOp = llvm::AtomicRMWInst::UIncWrap;
20224 break;
20225 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20226 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20227 BinOp = llvm::AtomicRMWInst::UDecWrap;
20228 break;
20229 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20230 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20231 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20232 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20233 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20234 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20235 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20236 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20237 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20238 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20239 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20240 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20241 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20242 BinOp = llvm::AtomicRMWInst::FAdd;
20243 break;
20244 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20245 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20246 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20247 BinOp = llvm::AtomicRMWInst::FMin;
20248 break;
20249 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20250 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
20251 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20252 BinOp = llvm::AtomicRMWInst::FMax;
20253 break;
20254 }
20255
20256 Address Ptr = CheckAtomicAlignment(*this, E);
20257 Value *Val = EmitScalarExpr(E->getArg(1));
20258 llvm::Type *OrigTy = Val->getType();
20259 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
20260
20261 bool Volatile;
20262
20263 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
20264 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
20265 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
20266 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
20267 Volatile =
20268 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
20269 } else {
20270 // Infer volatile from the passed type.
20271 Volatile =
20273 }
20274
20275 if (E->getNumArgs() >= 4) {
20276 // Some of the builtins have explicit ordering and scope arguments.
20278 EmitScalarExpr(E->getArg(3)), AO, SSID);
20279 } else {
20280 // Most of the builtins do not have syncscope/order arguments. For DS
20281 // atomics the scope doesn't really matter, as they implicitly operate at
20282 // workgroup scope.
20283 //
20284 // The global/flat cases need to use agent scope to consistently produce
20285 // the native instruction instead of a cmpxchg expansion.
20286 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
20287 AO = AtomicOrdering::Monotonic;
20288
20289 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
20290 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
20291 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
20292 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
20293 llvm::Type *V2BF16Ty = FixedVectorType::get(
20294 llvm::Type::getBFloatTy(Builder.getContext()), 2);
20295 Val = Builder.CreateBitCast(Val, V2BF16Ty);
20296 }
20297 }
20298
20299 llvm::AtomicRMWInst *RMW =
20300 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
20301 if (Volatile)
20302 RMW->setVolatile(true);
20303
20304 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
20305 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
20306 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
20307 // instruction for flat and global operations.
20308 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
20309 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
20310
20311 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
20312 // instruction, but this only matters for float fadd.
20313 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
20314 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
20315 }
20316
20317 return Builder.CreateBitCast(RMW, OrigTy);
20318 }
20319 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
20320 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
20321 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
20322 llvm::Type *ResultType = ConvertType(E->getType());
20323 // s_sendmsg_rtn is mangled using return type only.
20324 Function *F =
20325 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
20326 return Builder.CreateCall(F, {Arg});
20327 }
20328 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
20329 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
20330 // Because builtin types are limited, and the intrinsic uses a struct/pair
20331 // output, marshal the pair-of-i32 to <2 x i32>.
20332 Value *VDstOld = EmitScalarExpr(E->getArg(0));
20333 Value *VSrcOld = EmitScalarExpr(E->getArg(1));
20334 Value *FI = EmitScalarExpr(E->getArg(2));
20335 Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
20336 Function *F =
20337 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
20338 ? Intrinsic::amdgcn_permlane16_swap
20339 : Intrinsic::amdgcn_permlane32_swap);
20340 llvm::CallInst *Call =
20341 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
20342
20343 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
20344 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
20345
20346 llvm::Type *ResultType = ConvertType(E->getType());
20347
20348 llvm::Value *Insert0 = Builder.CreateInsertElement(
20349 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
20350 llvm::Value *AsVector =
20351 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
20352 return AsVector;
20353 }
20354 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
20355 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
20356 return emitBuiltinWithOneOverloadedType<4>(*this, E,
20357 Intrinsic::amdgcn_bitop3);
20358 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
20359 return emitBuiltinWithOneOverloadedType<4>(
20360 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
20361 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
20362 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
20363 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
20364 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
20365 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
20366 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
20367 return emitBuiltinWithOneOverloadedType<5>(
20368 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
20369 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20370 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20371 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20372 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20373 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20374 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
20375 llvm::Type *RetTy = nullptr;
20376 switch (BuiltinID) {
20377 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20378 RetTy = Int8Ty;
20379 break;
20380 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20381 RetTy = Int16Ty;
20382 break;
20383 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20384 RetTy = Int32Ty;
20385 break;
20386 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20387 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
20388 break;
20389 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20390 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
20391 break;
20392 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
20393 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
20394 break;
20395 }
20396 Function *F =
20397 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
20398 return Builder.CreateCall(
20399 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
20400 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
20401 }
20402 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
20403 return emitBuiltinWithOneOverloadedType<2>(
20404 *this, E, Intrinsic::amdgcn_s_prefetch_data);
20405 default:
20406 return nullptr;
20407 }
20408}
20409
20411 const CallExpr *E) {
20412 switch (BuiltinID) {
20413 case SPIRV::BI__builtin_spirv_distance: {
20414 Value *X = EmitScalarExpr(E->getArg(0));
20415 Value *Y = EmitScalarExpr(E->getArg(1));
20416 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
20417 E->getArg(1)->getType()->hasFloatingRepresentation() &&
20418 "Distance operands must have a float representation");
20419 assert(E->getArg(0)->getType()->isVectorType() &&
20420 E->getArg(1)->getType()->isVectorType() &&
20421 "Distance operands must be a vector");
20422 return Builder.CreateIntrinsic(
20423 /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_distance,
20424 ArrayRef<Value *>{X, Y}, nullptr, "spv.distance");
20425 }
20426 case SPIRV::BI__builtin_spirv_length: {
20427 Value *X = EmitScalarExpr(E->getArg(0));
20428 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
20429 "length operand must have a float representation");
20430 assert(E->getArg(0)->getType()->isVectorType() &&
20431 "length operand must be a vector");
20432 return Builder.CreateIntrinsic(
20433 /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_length,
20434 ArrayRef<Value *>{X}, nullptr, "spv.length");
20435 }
20436 }
20437 return nullptr;
20438}
20439
20440/// Handle a SystemZ function in which the final argument is a pointer
20441/// to an int that receives the post-instruction CC value. At the LLVM level
20442/// this is represented as a function that returns a {result, cc} pair.
20444 unsigned IntrinsicID,
20445 const CallExpr *E) {
20446 unsigned NumArgs = E->getNumArgs() - 1;
20447 SmallVector<Value *, 8> Args(NumArgs);
20448 for (unsigned I = 0; I < NumArgs; ++I)
20449 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
20450 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
20451 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
20452 Value *Call = CGF.Builder.CreateCall(F, Args);
20453 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
20454 CGF.Builder.CreateStore(CC, CCPtr);
20455 return CGF.Builder.CreateExtractValue(Call, 0);
20456}
20457
20459 const CallExpr *E) {
20460 switch (BuiltinID) {
20461 case SystemZ::BI__builtin_tbegin: {
20462 Value *TDB = EmitScalarExpr(E->getArg(0));
20463 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20464 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
20465 return Builder.CreateCall(F, {TDB, Control});
20466 }
20467 case SystemZ::BI__builtin_tbegin_nofloat: {
20468 Value *TDB = EmitScalarExpr(E->getArg(0));
20469 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20470 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
20471 return Builder.CreateCall(F, {TDB, Control});
20472 }
20473 case SystemZ::BI__builtin_tbeginc: {
20474 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
20475 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
20476 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
20477 return Builder.CreateCall(F, {TDB, Control});
20478 }
20479 case SystemZ::BI__builtin_tabort: {
20480 Value *Data = EmitScalarExpr(E->getArg(0));
20481 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
20482 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
20483 }
20484 case SystemZ::BI__builtin_non_tx_store: {
20485 Value *Address = EmitScalarExpr(E->getArg(0));
20486 Value *Data = EmitScalarExpr(E->getArg(1));
20487 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
20488 return Builder.CreateCall(F, {Data, Address});
20489 }
20490
20491 // Vector builtins. Note that most vector builtins are mapped automatically
20492 // to target-specific LLVM intrinsics. The ones handled specially here can
20493 // be represented via standard LLVM IR, which is preferable to enable common
20494 // LLVM optimizations.
20495
20496 case SystemZ::BI__builtin_s390_vclzb:
20497 case SystemZ::BI__builtin_s390_vclzh:
20498 case SystemZ::BI__builtin_s390_vclzf:
20499 case SystemZ::BI__builtin_s390_vclzg:
20500 case SystemZ::BI__builtin_s390_vclzq: {
20501 llvm::Type *ResultType = ConvertType(E->getType());
20502 Value *X = EmitScalarExpr(E->getArg(0));
20503 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20504 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
20505 return Builder.CreateCall(F, {X, Undef});
20506 }
20507
20508 case SystemZ::BI__builtin_s390_vctzb:
20509 case SystemZ::BI__builtin_s390_vctzh:
20510 case SystemZ::BI__builtin_s390_vctzf:
20511 case SystemZ::BI__builtin_s390_vctzg:
20512 case SystemZ::BI__builtin_s390_vctzq: {
20513 llvm::Type *ResultType = ConvertType(E->getType());
20514 Value *X = EmitScalarExpr(E->getArg(0));
20515 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20516 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
20517 return Builder.CreateCall(F, {X, Undef});
20518 }
20519
20520 case SystemZ::BI__builtin_s390_verllb:
20521 case SystemZ::BI__builtin_s390_verllh:
20522 case SystemZ::BI__builtin_s390_verllf:
20523 case SystemZ::BI__builtin_s390_verllg: {
20524 llvm::Type *ResultType = ConvertType(E->getType());
20525 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20526 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20527 // Splat scalar rotate amount to vector type.
20528 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
20529 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
20530 Amt = Builder.CreateVectorSplat(NumElts, Amt);
20531 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20532 return Builder.CreateCall(F, { Src, Src, Amt });
20533 }
20534
20535 case SystemZ::BI__builtin_s390_verllvb:
20536 case SystemZ::BI__builtin_s390_verllvh:
20537 case SystemZ::BI__builtin_s390_verllvf:
20538 case SystemZ::BI__builtin_s390_verllvg: {
20539 llvm::Type *ResultType = ConvertType(E->getType());
20540 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20541 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20542 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20543 return Builder.CreateCall(F, { Src, Src, Amt });
20544 }
20545
20546 case SystemZ::BI__builtin_s390_vfsqsb:
20547 case SystemZ::BI__builtin_s390_vfsqdb: {
20548 llvm::Type *ResultType = ConvertType(E->getType());
20549 Value *X = EmitScalarExpr(E->getArg(0));
20550 if (Builder.getIsFPConstrained()) {
20551 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
20552 return Builder.CreateConstrainedFPCall(F, { X });
20553 } else {
20554 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
20555 return Builder.CreateCall(F, X);
20556 }
20557 }
20558 case SystemZ::BI__builtin_s390_vfmasb:
20559 case SystemZ::BI__builtin_s390_vfmadb: {
20560 llvm::Type *ResultType = ConvertType(E->getType());
20561 Value *X = EmitScalarExpr(E->getArg(0));
20562 Value *Y = EmitScalarExpr(E->getArg(1));
20563 Value *Z = EmitScalarExpr(E->getArg(2));
20564 if (Builder.getIsFPConstrained()) {
20565 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20566 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
20567 } else {
20568 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20569 return Builder.CreateCall(F, {X, Y, Z});
20570 }
20571 }
20572 case SystemZ::BI__builtin_s390_vfmssb:
20573 case SystemZ::BI__builtin_s390_vfmsdb: {
20574 llvm::Type *ResultType = ConvertType(E->getType());
20575 Value *X = EmitScalarExpr(E->getArg(0));
20576 Value *Y = EmitScalarExpr(E->getArg(1));
20577 Value *Z = EmitScalarExpr(E->getArg(2));
20578 if (Builder.getIsFPConstrained()) {
20579 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20580 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20581 } else {
20582 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20583 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20584 }
20585 }
20586 case SystemZ::BI__builtin_s390_vfnmasb:
20587 case SystemZ::BI__builtin_s390_vfnmadb: {
20588 llvm::Type *ResultType = ConvertType(E->getType());
20589 Value *X = EmitScalarExpr(E->getArg(0));
20590 Value *Y = EmitScalarExpr(E->getArg(1));
20591 Value *Z = EmitScalarExpr(E->getArg(2));
20592 if (Builder.getIsFPConstrained()) {
20593 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20594 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
20595 } else {
20596 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20597 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
20598 }
20599 }
20600 case SystemZ::BI__builtin_s390_vfnmssb:
20601 case SystemZ::BI__builtin_s390_vfnmsdb: {
20602 llvm::Type *ResultType = ConvertType(E->getType());
20603 Value *X = EmitScalarExpr(E->getArg(0));
20604 Value *Y = EmitScalarExpr(E->getArg(1));
20605 Value *Z = EmitScalarExpr(E->getArg(2));
20606 if (Builder.getIsFPConstrained()) {
20607 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20608 Value *NegZ = Builder.CreateFNeg(Z, "sub");
20609 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
20610 } else {
20611 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20612 Value *NegZ = Builder.CreateFNeg(Z, "neg");
20613 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
20614 }
20615 }
20616 case SystemZ::BI__builtin_s390_vflpsb:
20617 case SystemZ::BI__builtin_s390_vflpdb: {
20618 llvm::Type *ResultType = ConvertType(E->getType());
20619 Value *X = EmitScalarExpr(E->getArg(0));
20620 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20621 return Builder.CreateCall(F, X);
20622 }
20623 case SystemZ::BI__builtin_s390_vflnsb:
20624 case SystemZ::BI__builtin_s390_vflndb: {
20625 llvm::Type *ResultType = ConvertType(E->getType());
20626 Value *X = EmitScalarExpr(E->getArg(0));
20627 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20628 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
20629 }
20630 case SystemZ::BI__builtin_s390_vfisb:
20631 case SystemZ::BI__builtin_s390_vfidb: {
20632 llvm::Type *ResultType = ConvertType(E->getType());
20633 Value *X = EmitScalarExpr(E->getArg(0));
20634 // Constant-fold the M4 and M5 mask arguments.
20635 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
20636 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20637 // Check whether this instance can be represented via a LLVM standard
20638 // intrinsic. We only support some combinations of M4 and M5.
20639 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20640 Intrinsic::ID CI;
20641 switch (M4.getZExtValue()) {
20642 default: break;
20643 case 0: // IEEE-inexact exception allowed
20644 switch (M5.getZExtValue()) {
20645 default: break;
20646 case 0: ID = Intrinsic::rint;
20647 CI = Intrinsic::experimental_constrained_rint; break;
20648 }
20649 break;
20650 case 4: // IEEE-inexact exception suppressed
20651 switch (M5.getZExtValue()) {
20652 default: break;
20653 case 0: ID = Intrinsic::nearbyint;
20654 CI = Intrinsic::experimental_constrained_nearbyint; break;
20655 case 1: ID = Intrinsic::round;
20656 CI = Intrinsic::experimental_constrained_round; break;
20657 case 5: ID = Intrinsic::trunc;
20658 CI = Intrinsic::experimental_constrained_trunc; break;
20659 case 6: ID = Intrinsic::ceil;
20660 CI = Intrinsic::experimental_constrained_ceil; break;
20661 case 7: ID = Intrinsic::floor;
20662 CI = Intrinsic::experimental_constrained_floor; break;
20663 }
20664 break;
20665 }
20666 if (ID != Intrinsic::not_intrinsic) {
20667 if (Builder.getIsFPConstrained()) {
20668 Function *F = CGM.getIntrinsic(CI, ResultType);
20669 return Builder.CreateConstrainedFPCall(F, X);
20670 } else {
20671 Function *F = CGM.getIntrinsic(ID, ResultType);
20672 return Builder.CreateCall(F, X);
20673 }
20674 }
20675 switch (BuiltinID) { // FIXME: constrained version?
20676 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
20677 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
20678 default: llvm_unreachable("Unknown BuiltinID");
20679 }
20680 Function *F = CGM.getIntrinsic(ID);
20681 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20682 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
20683 return Builder.CreateCall(F, {X, M4Value, M5Value});
20684 }
20685 case SystemZ::BI__builtin_s390_vfmaxsb:
20686 case SystemZ::BI__builtin_s390_vfmaxdb: {
20687 llvm::Type *ResultType = ConvertType(E->getType());
20688 Value *X = EmitScalarExpr(E->getArg(0));
20689 Value *Y = EmitScalarExpr(E->getArg(1));
20690 // Constant-fold the M4 mask argument.
20691 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20692 // Check whether this instance can be represented via a LLVM standard
20693 // intrinsic. We only support some values of M4.
20694 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20695 Intrinsic::ID CI;
20696 switch (M4.getZExtValue()) {
20697 default: break;
20698 case 4: ID = Intrinsic::maxnum;
20699 CI = Intrinsic::experimental_constrained_maxnum; break;
20700 }
20701 if (ID != Intrinsic::not_intrinsic) {
20702 if (Builder.getIsFPConstrained()) {
20703 Function *F = CGM.getIntrinsic(CI, ResultType);
20704 return Builder.CreateConstrainedFPCall(F, {X, Y});
20705 } else {
20706 Function *F = CGM.getIntrinsic(ID, ResultType);
20707 return Builder.CreateCall(F, {X, Y});
20708 }
20709 }
20710 switch (BuiltinID) {
20711 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
20712 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
20713 default: llvm_unreachable("Unknown BuiltinID");
20714 }
20715 Function *F = CGM.getIntrinsic(ID);
20716 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20717 return Builder.CreateCall(F, {X, Y, M4Value});
20718 }
20719 case SystemZ::BI__builtin_s390_vfminsb:
20720 case SystemZ::BI__builtin_s390_vfmindb: {
20721 llvm::Type *ResultType = ConvertType(E->getType());
20722 Value *X = EmitScalarExpr(E->getArg(0));
20723 Value *Y = EmitScalarExpr(E->getArg(1));
20724 // Constant-fold the M4 mask argument.
20725 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20726 // Check whether this instance can be represented via a LLVM standard
20727 // intrinsic. We only support some values of M4.
20728 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20729 Intrinsic::ID CI;
20730 switch (M4.getZExtValue()) {
20731 default: break;
20732 case 4: ID = Intrinsic::minnum;
20733 CI = Intrinsic::experimental_constrained_minnum; break;
20734 }
20735 if (ID != Intrinsic::not_intrinsic) {
20736 if (Builder.getIsFPConstrained()) {
20737 Function *F = CGM.getIntrinsic(CI, ResultType);
20738 return Builder.CreateConstrainedFPCall(F, {X, Y});
20739 } else {
20740 Function *F = CGM.getIntrinsic(ID, ResultType);
20741 return Builder.CreateCall(F, {X, Y});
20742 }
20743 }
20744 switch (BuiltinID) {
20745 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
20746 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
20747 default: llvm_unreachable("Unknown BuiltinID");
20748 }
20749 Function *F = CGM.getIntrinsic(ID);
20750 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20751 return Builder.CreateCall(F, {X, Y, M4Value});
20752 }
20753
20754 case SystemZ::BI__builtin_s390_vlbrh:
20755 case SystemZ::BI__builtin_s390_vlbrf:
20756 case SystemZ::BI__builtin_s390_vlbrg:
20757 case SystemZ::BI__builtin_s390_vlbrq: {
20758 llvm::Type *ResultType = ConvertType(E->getType());
20759 Value *X = EmitScalarExpr(E->getArg(0));
20760 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
20761 return Builder.CreateCall(F, X);
20762 }
20763
20764 // Vector intrinsics that output the post-instruction CC value.
20765
20766#define INTRINSIC_WITH_CC(NAME) \
20767 case SystemZ::BI__builtin_##NAME: \
20768 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
20769
20770 INTRINSIC_WITH_CC(s390_vpkshs);
20771 INTRINSIC_WITH_CC(s390_vpksfs);
20772 INTRINSIC_WITH_CC(s390_vpksgs);
20773
20774 INTRINSIC_WITH_CC(s390_vpklshs);
20775 INTRINSIC_WITH_CC(s390_vpklsfs);
20776 INTRINSIC_WITH_CC(s390_vpklsgs);
20777
20778 INTRINSIC_WITH_CC(s390_vceqbs);
20779 INTRINSIC_WITH_CC(s390_vceqhs);
20780 INTRINSIC_WITH_CC(s390_vceqfs);
20781 INTRINSIC_WITH_CC(s390_vceqgs);
20782 INTRINSIC_WITH_CC(s390_vceqqs);
20783
20784 INTRINSIC_WITH_CC(s390_vchbs);
20785 INTRINSIC_WITH_CC(s390_vchhs);
20786 INTRINSIC_WITH_CC(s390_vchfs);
20787 INTRINSIC_WITH_CC(s390_vchgs);
20788 INTRINSIC_WITH_CC(s390_vchqs);
20789
20790 INTRINSIC_WITH_CC(s390_vchlbs);
20791 INTRINSIC_WITH_CC(s390_vchlhs);
20792 INTRINSIC_WITH_CC(s390_vchlfs);
20793 INTRINSIC_WITH_CC(s390_vchlgs);
20794 INTRINSIC_WITH_CC(s390_vchlqs);
20795
20796 INTRINSIC_WITH_CC(s390_vfaebs);
20797 INTRINSIC_WITH_CC(s390_vfaehs);
20798 INTRINSIC_WITH_CC(s390_vfaefs);
20799
20800 INTRINSIC_WITH_CC(s390_vfaezbs);
20801 INTRINSIC_WITH_CC(s390_vfaezhs);
20802 INTRINSIC_WITH_CC(s390_vfaezfs);
20803
20804 INTRINSIC_WITH_CC(s390_vfeebs);
20805 INTRINSIC_WITH_CC(s390_vfeehs);
20806 INTRINSIC_WITH_CC(s390_vfeefs);
20807
20808 INTRINSIC_WITH_CC(s390_vfeezbs);
20809 INTRINSIC_WITH_CC(s390_vfeezhs);
20810 INTRINSIC_WITH_CC(s390_vfeezfs);
20811
20812 INTRINSIC_WITH_CC(s390_vfenebs);
20813 INTRINSIC_WITH_CC(s390_vfenehs);
20814 INTRINSIC_WITH_CC(s390_vfenefs);
20815
20816 INTRINSIC_WITH_CC(s390_vfenezbs);
20817 INTRINSIC_WITH_CC(s390_vfenezhs);
20818 INTRINSIC_WITH_CC(s390_vfenezfs);
20819
20820 INTRINSIC_WITH_CC(s390_vistrbs);
20821 INTRINSIC_WITH_CC(s390_vistrhs);
20822 INTRINSIC_WITH_CC(s390_vistrfs);
20823
20824 INTRINSIC_WITH_CC(s390_vstrcbs);
20825 INTRINSIC_WITH_CC(s390_vstrchs);
20826 INTRINSIC_WITH_CC(s390_vstrcfs);
20827
20828 INTRINSIC_WITH_CC(s390_vstrczbs);
20829 INTRINSIC_WITH_CC(s390_vstrczhs);
20830 INTRINSIC_WITH_CC(s390_vstrczfs);
20831
20832 INTRINSIC_WITH_CC(s390_vfcesbs);
20833 INTRINSIC_WITH_CC(s390_vfcedbs);
20834 INTRINSIC_WITH_CC(s390_vfchsbs);
20835 INTRINSIC_WITH_CC(s390_vfchdbs);
20836 INTRINSIC_WITH_CC(s390_vfchesbs);
20837 INTRINSIC_WITH_CC(s390_vfchedbs);
20838
20839 INTRINSIC_WITH_CC(s390_vftcisb);
20840 INTRINSIC_WITH_CC(s390_vftcidb);
20841
20842 INTRINSIC_WITH_CC(s390_vstrsb);
20843 INTRINSIC_WITH_CC(s390_vstrsh);
20844 INTRINSIC_WITH_CC(s390_vstrsf);
20845
20846 INTRINSIC_WITH_CC(s390_vstrszb);
20847 INTRINSIC_WITH_CC(s390_vstrszh);
20848 INTRINSIC_WITH_CC(s390_vstrszf);
20849
20850#undef INTRINSIC_WITH_CC
20851
20852 default:
20853 return nullptr;
20854 }
20855}
20856
20857namespace {
20858// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
20859struct NVPTXMmaLdstInfo {
20860 unsigned NumResults; // Number of elements to load/store
20861 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
20862 unsigned IID_col;
20863 unsigned IID_row;
20864};
20865
20866#define MMA_INTR(geom_op_type, layout) \
20867 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
20868#define MMA_LDST(n, geom_op_type) \
20869 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
20870
20871static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
20872 switch (BuiltinID) {
20873 // FP MMA loads
20874 case NVPTX::BI__hmma_m16n16k16_ld_a:
20875 return MMA_LDST(8, m16n16k16_load_a_f16);
20876 case NVPTX::BI__hmma_m16n16k16_ld_b:
20877 return MMA_LDST(8, m16n16k16_load_b_f16);
20878 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20879 return MMA_LDST(4, m16n16k16_load_c_f16);
20880 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20881 return MMA_LDST(8, m16n16k16_load_c_f32);
20882 case NVPTX::BI__hmma_m32n8k16_ld_a:
20883 return MMA_LDST(8, m32n8k16_load_a_f16);
20884 case NVPTX::BI__hmma_m32n8k16_ld_b:
20885 return MMA_LDST(8, m32n8k16_load_b_f16);
20886 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20887 return MMA_LDST(4, m32n8k16_load_c_f16);
20888 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20889 return MMA_LDST(8, m32n8k16_load_c_f32);
20890 case NVPTX::BI__hmma_m8n32k16_ld_a:
20891 return MMA_LDST(8, m8n32k16_load_a_f16);
20892 case NVPTX::BI__hmma_m8n32k16_ld_b:
20893 return MMA_LDST(8, m8n32k16_load_b_f16);
20894 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20895 return MMA_LDST(4, m8n32k16_load_c_f16);
20896 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20897 return MMA_LDST(8, m8n32k16_load_c_f32);
20898
20899 // Integer MMA loads
20900 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20901 return MMA_LDST(2, m16n16k16_load_a_s8);
20902 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20903 return MMA_LDST(2, m16n16k16_load_a_u8);
20904 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20905 return MMA_LDST(2, m16n16k16_load_b_s8);
20906 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20907 return MMA_LDST(2, m16n16k16_load_b_u8);
20908 case NVPTX::BI__imma_m16n16k16_ld_c:
20909 return MMA_LDST(8, m16n16k16_load_c_s32);
20910 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20911 return MMA_LDST(4, m32n8k16_load_a_s8);
20912 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20913 return MMA_LDST(4, m32n8k16_load_a_u8);
20914 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20915 return MMA_LDST(1, m32n8k16_load_b_s8);
20916 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20917 return MMA_LDST(1, m32n8k16_load_b_u8);
20918 case NVPTX::BI__imma_m32n8k16_ld_c:
20919 return MMA_LDST(8, m32n8k16_load_c_s32);
20920 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20921 return MMA_LDST(1, m8n32k16_load_a_s8);
20922 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20923 return MMA_LDST(1, m8n32k16_load_a_u8);
20924 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20925 return MMA_LDST(4, m8n32k16_load_b_s8);
20926 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20927 return MMA_LDST(4, m8n32k16_load_b_u8);
20928 case NVPTX::BI__imma_m8n32k16_ld_c:
20929 return MMA_LDST(8, m8n32k16_load_c_s32);
20930
20931 // Sub-integer MMA loads.
20932 // Only row/col layout is supported by A/B fragments.
20933 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20934 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
20935 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20936 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
20937 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20938 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
20939 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20940 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
20941 case NVPTX::BI__imma_m8n8k32_ld_c:
20942 return MMA_LDST(2, m8n8k32_load_c_s32);
20943 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20944 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
20945 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20946 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
20947 case NVPTX::BI__bmma_m8n8k128_ld_c:
20948 return MMA_LDST(2, m8n8k128_load_c_s32);
20949
20950 // Double MMA loads
20951 case NVPTX::BI__dmma_m8n8k4_ld_a:
20952 return MMA_LDST(1, m8n8k4_load_a_f64);
20953 case NVPTX::BI__dmma_m8n8k4_ld_b:
20954 return MMA_LDST(1, m8n8k4_load_b_f64);
20955 case NVPTX::BI__dmma_m8n8k4_ld_c:
20956 return MMA_LDST(2, m8n8k4_load_c_f64);
20957
20958 // Alternate float MMA loads
20959 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20960 return MMA_LDST(4, m16n16k16_load_a_bf16);
20961 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20962 return MMA_LDST(4, m16n16k16_load_b_bf16);
20963 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20964 return MMA_LDST(2, m8n32k16_load_a_bf16);
20965 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20966 return MMA_LDST(8, m8n32k16_load_b_bf16);
20967 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20968 return MMA_LDST(8, m32n8k16_load_a_bf16);
20969 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20970 return MMA_LDST(2, m32n8k16_load_b_bf16);
20971 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20972 return MMA_LDST(4, m16n16k8_load_a_tf32);
20973 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20974 return MMA_LDST(4, m16n16k8_load_b_tf32);
20975 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
20976 return MMA_LDST(8, m16n16k8_load_c_f32);
20977
20978 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
20979 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
20980 // use fragment C for both loads and stores.
20981 // FP MMA stores.
20982 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20983 return MMA_LDST(4, m16n16k16_store_d_f16);
20984 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20985 return MMA_LDST(8, m16n16k16_store_d_f32);
20986 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20987 return MMA_LDST(4, m32n8k16_store_d_f16);
20988 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20989 return MMA_LDST(8, m32n8k16_store_d_f32);
20990 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20991 return MMA_LDST(4, m8n32k16_store_d_f16);
20992 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20993 return MMA_LDST(8, m8n32k16_store_d_f32);
20994
20995 // Integer and sub-integer MMA stores.
20996 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
20997 // name, integer loads/stores use LLVM's i32.
20998 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20999 return MMA_LDST(8, m16n16k16_store_d_s32);
21000 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21001 return MMA_LDST(8, m32n8k16_store_d_s32);
21002 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21003 return MMA_LDST(8, m8n32k16_store_d_s32);
21004 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21005 return MMA_LDST(2, m8n8k32_store_d_s32);
21006 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21007 return MMA_LDST(2, m8n8k128_store_d_s32);
21008
21009 // Double MMA store
21010 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21011 return MMA_LDST(2, m8n8k4_store_d_f64);
21012
21013 // Alternate float MMA store
21014 case NVPTX::BI__mma_m16n16k8_st_c_f32:
21015 return MMA_LDST(8, m16n16k8_store_d_f32);
21016
21017 default:
21018 llvm_unreachable("Unknown MMA builtin");
21019 }
21020}
21021#undef MMA_LDST
21022#undef MMA_INTR
21023
21024
21025struct NVPTXMmaInfo {
21026 unsigned NumEltsA;
21027 unsigned NumEltsB;
21028 unsigned NumEltsC;
21029 unsigned NumEltsD;
21030
21031 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
21032 // over 'col' for layout. The index of non-satf variants is expected to match
21033 // the undocumented layout constants used by CUDA's mma.hpp.
21034 std::array<unsigned, 8> Variants;
21035
21036 unsigned getMMAIntrinsic(int Layout, bool Satf) {
21037 unsigned Index = Layout + 4 * Satf;
21038 if (Index >= Variants.size())
21039 return 0;
21040 return Variants[Index];
21041 }
21042};
21043
21044 // Returns an intrinsic that matches Layout and Satf for valid combinations of
21045 // Layout and Satf, 0 otherwise.
21046static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
21047 // clang-format off
21048#define MMA_VARIANTS(geom, type) \
21049 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
21050 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21051 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
21052 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
21053#define MMA_SATF_VARIANTS(geom, type) \
21054 MMA_VARIANTS(geom, type), \
21055 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
21056 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21057 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
21058 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
21059// Sub-integer MMA only supports row.col layout.
21060#define MMA_VARIANTS_I4(geom, type) \
21061 0, \
21062 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21063 0, \
21064 0, \
21065 0, \
21066 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21067 0, \
21068 0
21069// b1 MMA does not support .satfinite.
21070#define MMA_VARIANTS_B1_XOR(geom, type) \
21071 0, \
21072 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
21073 0, \
21074 0, \
21075 0, \
21076 0, \
21077 0, \
21078 0
21079#define MMA_VARIANTS_B1_AND(geom, type) \
21080 0, \
21081 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
21082 0, \
21083 0, \
21084 0, \
21085 0, \
21086 0, \
21087 0
21088 // clang-format on
21089 switch (BuiltinID) {
21090 // FP MMA
21091 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
21092 // NumEltsN of return value are ordered as A,B,C,D.
21093 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21094 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
21095 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21096 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
21097 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21098 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
21099 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21100 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
21101 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21102 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
21103 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21104 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
21105 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21106 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
21107 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21108 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
21109 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21110 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
21111 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21112 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
21113 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21114 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
21115 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21116 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
21117
21118 // Integer MMA
21119 case NVPTX::BI__imma_m16n16k16_mma_s8:
21120 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
21121 case NVPTX::BI__imma_m16n16k16_mma_u8:
21122 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
21123 case NVPTX::BI__imma_m32n8k16_mma_s8:
21124 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
21125 case NVPTX::BI__imma_m32n8k16_mma_u8:
21126 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
21127 case NVPTX::BI__imma_m8n32k16_mma_s8:
21128 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
21129 case NVPTX::BI__imma_m8n32k16_mma_u8:
21130 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
21131
21132 // Sub-integer MMA
21133 case NVPTX::BI__imma_m8n8k32_mma_s4:
21134 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
21135 case NVPTX::BI__imma_m8n8k32_mma_u4:
21136 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
21137 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21138 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
21139 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21140 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
21141
21142 // Double MMA
21143 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21144 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
21145
21146 // Alternate FP MMA
21147 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21148 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
21149 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21150 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
21151 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21152 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
21153 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
21154 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
21155 default:
21156 llvm_unreachable("Unexpected builtin ID.");
21157 }
21158#undef MMA_VARIANTS
21159#undef MMA_SATF_VARIANTS
21160#undef MMA_VARIANTS_I4
21161#undef MMA_VARIANTS_B1_AND
21162#undef MMA_VARIANTS_B1_XOR
21163}
21164
21165static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
21166 const CallExpr *E) {
21167 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21168 QualType ArgType = E->getArg(0)->getType();
21170 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21171 return CGF.Builder.CreateCall(
21172 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21173 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
21174}
21175
21176static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
21177 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21178 QualType ArgType = E->getArg(0)->getType();
21180 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21181
21182 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
21183 auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
21184 auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
21185 MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
21186 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
21187
21188 return LD;
21189}
21190
21191static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
21192 const CallExpr *E) {
21193 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21194 llvm::Type *ElemTy =
21195 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21196 return CGF.Builder.CreateCall(
21197 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21198 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
21199}
21200
21201static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
21202 CodeGenFunction &CGF, const CallExpr *E,
21203 int SrcSize) {
21204 return E->getNumArgs() == 3
21205 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
21206 {CGF.EmitScalarExpr(E->getArg(0)),
21207 CGF.EmitScalarExpr(E->getArg(1)),
21208 CGF.EmitScalarExpr(E->getArg(2))})
21209 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
21210 {CGF.EmitScalarExpr(E->getArg(0)),
21211 CGF.EmitScalarExpr(E->getArg(1))});
21212}
21213
21214static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
21215 const CallExpr *E, CodeGenFunction &CGF) {
21216 auto &C = CGF.CGM.getContext();
21217 if (!(C.getLangOpts().NativeHalfType ||
21218 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
21219 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
21220 " requires native half type support.");
21221 return nullptr;
21222 }
21223
21224 if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
21225 return MakeLdg(CGF, E);
21226
21227 if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
21228 return MakeLdu(IntrinsicID, CGF, E);
21229
21231 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
21232 auto *FTy = F->getFunctionType();
21233 unsigned ICEArguments = 0;
21235 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
21236 assert(Error == ASTContext::GE_None && "Should not codegen an error");
21237 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
21238 assert((ICEArguments & (1 << i)) == 0);
21239 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
21240 auto *PTy = FTy->getParamType(i);
21241 if (PTy != ArgValue->getType())
21242 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
21243 Args.push_back(ArgValue);
21244 }
21245
21246 return CGF.Builder.CreateCall(F, Args);
21247}
21248} // namespace
21249
21251 const CallExpr *E) {
21252 switch (BuiltinID) {
21253 case NVPTX::BI__nvvm_atom_add_gen_i:
21254 case NVPTX::BI__nvvm_atom_add_gen_l:
21255 case NVPTX::BI__nvvm_atom_add_gen_ll:
21256 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
21257
21258 case NVPTX::BI__nvvm_atom_sub_gen_i:
21259 case NVPTX::BI__nvvm_atom_sub_gen_l:
21260 case NVPTX::BI__nvvm_atom_sub_gen_ll:
21261 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
21262
21263 case NVPTX::BI__nvvm_atom_and_gen_i:
21264 case NVPTX::BI__nvvm_atom_and_gen_l:
21265 case NVPTX::BI__nvvm_atom_and_gen_ll:
21266 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
21267
21268 case NVPTX::BI__nvvm_atom_or_gen_i:
21269 case NVPTX::BI__nvvm_atom_or_gen_l:
21270 case NVPTX::BI__nvvm_atom_or_gen_ll:
21271 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
21272
21273 case NVPTX::BI__nvvm_atom_xor_gen_i:
21274 case NVPTX::BI__nvvm_atom_xor_gen_l:
21275 case NVPTX::BI__nvvm_atom_xor_gen_ll:
21276 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
21277
21278 case NVPTX::BI__nvvm_atom_xchg_gen_i:
21279 case NVPTX::BI__nvvm_atom_xchg_gen_l:
21280 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
21281 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
21282
21283 case NVPTX::BI__nvvm_atom_max_gen_i:
21284 case NVPTX::BI__nvvm_atom_max_gen_l:
21285 case NVPTX::BI__nvvm_atom_max_gen_ll:
21286 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
21287
21288 case NVPTX::BI__nvvm_atom_max_gen_ui:
21289 case NVPTX::BI__nvvm_atom_max_gen_ul:
21290 case NVPTX::BI__nvvm_atom_max_gen_ull:
21291 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
21292
21293 case NVPTX::BI__nvvm_atom_min_gen_i:
21294 case NVPTX::BI__nvvm_atom_min_gen_l:
21295 case NVPTX::BI__nvvm_atom_min_gen_ll:
21296 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
21297
21298 case NVPTX::BI__nvvm_atom_min_gen_ui:
21299 case NVPTX::BI__nvvm_atom_min_gen_ul:
21300 case NVPTX::BI__nvvm_atom_min_gen_ull:
21301 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
21302
21303 case NVPTX::BI__nvvm_atom_cas_gen_us:
21304 case NVPTX::BI__nvvm_atom_cas_gen_i:
21305 case NVPTX::BI__nvvm_atom_cas_gen_l:
21306 case NVPTX::BI__nvvm_atom_cas_gen_ll:
21307 // __nvvm_atom_cas_gen_* should return the old value rather than the
21308 // success flag.
21309 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
21310
21311 case NVPTX::BI__nvvm_atom_add_gen_f:
21312 case NVPTX::BI__nvvm_atom_add_gen_d: {
21313 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
21314 Value *Val = EmitScalarExpr(E->getArg(1));
21315
21316 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
21317 AtomicOrdering::SequentiallyConsistent);
21318 }
21319
21320 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
21321 Value *Ptr = EmitScalarExpr(E->getArg(0));
21322 Value *Val = EmitScalarExpr(E->getArg(1));
21323 Function *FnALI32 =
21324 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
21325 return Builder.CreateCall(FnALI32, {Ptr, Val});
21326 }
21327
21328 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
21329 Value *Ptr = EmitScalarExpr(E->getArg(0));
21330 Value *Val = EmitScalarExpr(E->getArg(1));
21331 Function *FnALD32 =
21332 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
21333 return Builder.CreateCall(FnALD32, {Ptr, Val});
21334 }
21335
21336 case NVPTX::BI__nvvm_ldg_c:
21337 case NVPTX::BI__nvvm_ldg_sc:
21338 case NVPTX::BI__nvvm_ldg_c2:
21339 case NVPTX::BI__nvvm_ldg_sc2:
21340 case NVPTX::BI__nvvm_ldg_c4:
21341 case NVPTX::BI__nvvm_ldg_sc4:
21342 case NVPTX::BI__nvvm_ldg_s:
21343 case NVPTX::BI__nvvm_ldg_s2:
21344 case NVPTX::BI__nvvm_ldg_s4:
21345 case NVPTX::BI__nvvm_ldg_i:
21346 case NVPTX::BI__nvvm_ldg_i2:
21347 case NVPTX::BI__nvvm_ldg_i4:
21348 case NVPTX::BI__nvvm_ldg_l:
21349 case NVPTX::BI__nvvm_ldg_l2:
21350 case NVPTX::BI__nvvm_ldg_ll:
21351 case NVPTX::BI__nvvm_ldg_ll2:
21352 case NVPTX::BI__nvvm_ldg_uc:
21353 case NVPTX::BI__nvvm_ldg_uc2:
21354 case NVPTX::BI__nvvm_ldg_uc4:
21355 case NVPTX::BI__nvvm_ldg_us:
21356 case NVPTX::BI__nvvm_ldg_us2:
21357 case NVPTX::BI__nvvm_ldg_us4:
21358 case NVPTX::BI__nvvm_ldg_ui:
21359 case NVPTX::BI__nvvm_ldg_ui2:
21360 case NVPTX::BI__nvvm_ldg_ui4:
21361 case NVPTX::BI__nvvm_ldg_ul:
21362 case NVPTX::BI__nvvm_ldg_ul2:
21363 case NVPTX::BI__nvvm_ldg_ull:
21364 case NVPTX::BI__nvvm_ldg_ull2:
21365 case NVPTX::BI__nvvm_ldg_f:
21366 case NVPTX::BI__nvvm_ldg_f2:
21367 case NVPTX::BI__nvvm_ldg_f4:
21368 case NVPTX::BI__nvvm_ldg_d:
21369 case NVPTX::BI__nvvm_ldg_d2:
21370 // PTX Interoperability section 2.2: "For a vector with an even number of
21371 // elements, its alignment is set to number of elements times the alignment
21372 // of its member: n*alignof(t)."
21373 return MakeLdg(*this, E);
21374
21375 case NVPTX::BI__nvvm_ldu_c:
21376 case NVPTX::BI__nvvm_ldu_sc:
21377 case NVPTX::BI__nvvm_ldu_c2:
21378 case NVPTX::BI__nvvm_ldu_sc2:
21379 case NVPTX::BI__nvvm_ldu_c4:
21380 case NVPTX::BI__nvvm_ldu_sc4:
21381 case NVPTX::BI__nvvm_ldu_s:
21382 case NVPTX::BI__nvvm_ldu_s2:
21383 case NVPTX::BI__nvvm_ldu_s4:
21384 case NVPTX::BI__nvvm_ldu_i:
21385 case NVPTX::BI__nvvm_ldu_i2:
21386 case NVPTX::BI__nvvm_ldu_i4:
21387 case NVPTX::BI__nvvm_ldu_l:
21388 case NVPTX::BI__nvvm_ldu_l2:
21389 case NVPTX::BI__nvvm_ldu_ll:
21390 case NVPTX::BI__nvvm_ldu_ll2:
21391 case NVPTX::BI__nvvm_ldu_uc:
21392 case NVPTX::BI__nvvm_ldu_uc2:
21393 case NVPTX::BI__nvvm_ldu_uc4:
21394 case NVPTX::BI__nvvm_ldu_us:
21395 case NVPTX::BI__nvvm_ldu_us2:
21396 case NVPTX::BI__nvvm_ldu_us4:
21397 case NVPTX::BI__nvvm_ldu_ui:
21398 case NVPTX::BI__nvvm_ldu_ui2:
21399 case NVPTX::BI__nvvm_ldu_ui4:
21400 case NVPTX::BI__nvvm_ldu_ul:
21401 case NVPTX::BI__nvvm_ldu_ul2:
21402 case NVPTX::BI__nvvm_ldu_ull:
21403 case NVPTX::BI__nvvm_ldu_ull2:
21404 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
21405 case NVPTX::BI__nvvm_ldu_f:
21406 case NVPTX::BI__nvvm_ldu_f2:
21407 case NVPTX::BI__nvvm_ldu_f4:
21408 case NVPTX::BI__nvvm_ldu_d:
21409 case NVPTX::BI__nvvm_ldu_d2:
21410 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
21411
21412 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
21413 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
21414 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
21415 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
21416 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
21417 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
21418 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
21419 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
21420 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
21421 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
21422 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
21423 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
21424 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
21425 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
21426 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
21427 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
21428 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
21429 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
21430 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
21431 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
21432 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
21433 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
21434 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
21435 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
21436 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
21437 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
21438 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
21439 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
21440 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
21441 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
21442 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
21443 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
21444 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
21445 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
21446 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
21447 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
21448 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
21449 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
21450 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
21451 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
21452 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
21453 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
21454 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
21455 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
21456 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
21457 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
21458 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
21459 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
21460 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
21461 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
21462 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
21463 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
21464 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
21465 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
21466 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
21467 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
21468 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
21469 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
21470 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
21471 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
21472 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
21473 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
21474 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
21475 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
21476 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
21477 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
21478 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
21479 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
21480 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
21481 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
21482 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
21483 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
21484 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
21485 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
21486 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
21487 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
21488 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
21489 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
21490 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
21491 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
21492 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
21493 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
21494 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
21495 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
21496 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
21497 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
21498 Value *Ptr = EmitScalarExpr(E->getArg(0));
21499 llvm::Type *ElemTy =
21500 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21501 return Builder.CreateCall(
21503 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
21504 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21505 }
21506 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
21507 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
21508 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
21509 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
21510 Value *Ptr = EmitScalarExpr(E->getArg(0));
21511 llvm::Type *ElemTy =
21512 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21513 return Builder.CreateCall(
21515 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
21516 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21517 }
21518 case NVPTX::BI__nvvm_match_all_sync_i32p:
21519 case NVPTX::BI__nvvm_match_all_sync_i64p: {
21520 Value *Mask = EmitScalarExpr(E->getArg(0));
21521 Value *Val = EmitScalarExpr(E->getArg(1));
21522 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
21523 Value *ResultPair = Builder.CreateCall(
21524 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
21525 ? Intrinsic::nvvm_match_all_sync_i32p
21526 : Intrinsic::nvvm_match_all_sync_i64p),
21527 {Mask, Val});
21528 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
21529 PredOutPtr.getElementType());
21530 Builder.CreateStore(Pred, PredOutPtr);
21531 return Builder.CreateExtractValue(ResultPair, 0);
21532 }
21533
21534 // FP MMA loads
21535 case NVPTX::BI__hmma_m16n16k16_ld_a:
21536 case NVPTX::BI__hmma_m16n16k16_ld_b:
21537 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
21538 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
21539 case NVPTX::BI__hmma_m32n8k16_ld_a:
21540 case NVPTX::BI__hmma_m32n8k16_ld_b:
21541 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
21542 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
21543 case NVPTX::BI__hmma_m8n32k16_ld_a:
21544 case NVPTX::BI__hmma_m8n32k16_ld_b:
21545 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
21546 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
21547 // Integer MMA loads.
21548 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
21549 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
21550 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
21551 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
21552 case NVPTX::BI__imma_m16n16k16_ld_c:
21553 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
21554 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
21555 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
21556 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
21557 case NVPTX::BI__imma_m32n8k16_ld_c:
21558 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
21559 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
21560 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
21561 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
21562 case NVPTX::BI__imma_m8n32k16_ld_c:
21563 // Sub-integer MMA loads.
21564 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21565 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21566 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21567 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21568 case NVPTX::BI__imma_m8n8k32_ld_c:
21569 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21570 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21571 case NVPTX::BI__bmma_m8n8k128_ld_c:
21572 // Double MMA loads.
21573 case NVPTX::BI__dmma_m8n8k4_ld_a:
21574 case NVPTX::BI__dmma_m8n8k4_ld_b:
21575 case NVPTX::BI__dmma_m8n8k4_ld_c:
21576 // Alternate float MMA loads.
21577 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21578 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21579 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21580 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21581 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21582 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21583 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21584 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21585 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
21586 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21587 Value *Src = EmitScalarExpr(E->getArg(1));
21588 Value *Ldm = EmitScalarExpr(E->getArg(2));
21589 std::optional<llvm::APSInt> isColMajorArg =
21590 E->getArg(3)->getIntegerConstantExpr(getContext());
21591 if (!isColMajorArg)
21592 return nullptr;
21593 bool isColMajor = isColMajorArg->getSExtValue();
21594 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21595 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21596 if (IID == 0)
21597 return nullptr;
21598
21599 Value *Result =
21600 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
21601
21602 // Save returned values.
21603 assert(II.NumResults);
21604 if (II.NumResults == 1) {
21607 } else {
21608 for (unsigned i = 0; i < II.NumResults; ++i) {
21610 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
21611 Dst.getElementType()),
21613 llvm::ConstantInt::get(IntTy, i)),
21615 }
21616 }
21617 return Result;
21618 }
21619
21620 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21621 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21622 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21623 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21624 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21625 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21626 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21627 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21628 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21629 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21630 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21631 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21632 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
21633 Value *Dst = EmitScalarExpr(E->getArg(0));
21634 Address Src = EmitPointerWithAlignment(E->getArg(1));
21635 Value *Ldm = EmitScalarExpr(E->getArg(2));
21636 std::optional<llvm::APSInt> isColMajorArg =
21637 E->getArg(3)->getIntegerConstantExpr(getContext());
21638 if (!isColMajorArg)
21639 return nullptr;
21640 bool isColMajor = isColMajorArg->getSExtValue();
21641 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21642 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21643 if (IID == 0)
21644 return nullptr;
21645 Function *Intrinsic =
21646 CGM.getIntrinsic(IID, Dst->getType());
21647 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
21648 SmallVector<Value *, 10> Values = {Dst};
21649 for (unsigned i = 0; i < II.NumResults; ++i) {
21651 Src.getElementType(),
21653 llvm::ConstantInt::get(IntTy, i)),
21655 Values.push_back(Builder.CreateBitCast(V, ParamType));
21656 }
21657 Values.push_back(Ldm);
21658 Value *Result = Builder.CreateCall(Intrinsic, Values);
21659 return Result;
21660 }
21661
21662 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
21663 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
21664 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21665 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21666 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21667 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21668 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21669 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21670 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21671 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21672 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21673 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21674 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21675 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21676 case NVPTX::BI__imma_m16n16k16_mma_s8:
21677 case NVPTX::BI__imma_m16n16k16_mma_u8:
21678 case NVPTX::BI__imma_m32n8k16_mma_s8:
21679 case NVPTX::BI__imma_m32n8k16_mma_u8:
21680 case NVPTX::BI__imma_m8n32k16_mma_s8:
21681 case NVPTX::BI__imma_m8n32k16_mma_u8:
21682 case NVPTX::BI__imma_m8n8k32_mma_s4:
21683 case NVPTX::BI__imma_m8n8k32_mma_u4:
21684 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21685 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21686 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21687 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21688 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21689 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21690 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
21691 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21692 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
21693 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
21694 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
21695 std::optional<llvm::APSInt> LayoutArg =
21696 E->getArg(4)->getIntegerConstantExpr(getContext());
21697 if (!LayoutArg)
21698 return nullptr;
21699 int Layout = LayoutArg->getSExtValue();
21700 if (Layout < 0 || Layout > 3)
21701 return nullptr;
21702 llvm::APSInt SatfArg;
21703 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
21704 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
21705 SatfArg = 0; // .b1 does not have satf argument.
21706 else if (std::optional<llvm::APSInt> OptSatfArg =
21707 E->getArg(5)->getIntegerConstantExpr(getContext()))
21708 SatfArg = *OptSatfArg;
21709 else
21710 return nullptr;
21711 bool Satf = SatfArg.getSExtValue();
21712 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
21713 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
21714 if (IID == 0) // Unsupported combination of Layout/Satf.
21715 return nullptr;
21716
21718 Function *Intrinsic = CGM.getIntrinsic(IID);
21719 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
21720 // Load A
21721 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
21723 SrcA.getElementType(),
21724 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
21725 llvm::ConstantInt::get(IntTy, i)),
21727 Values.push_back(Builder.CreateBitCast(V, AType));
21728 }
21729 // Load B
21730 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
21731 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
21733 SrcB.getElementType(),
21734 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
21735 llvm::ConstantInt::get(IntTy, i)),
21737 Values.push_back(Builder.CreateBitCast(V, BType));
21738 }
21739 // Load C
21740 llvm::Type *CType =
21741 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
21742 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
21744 SrcC.getElementType(),
21745 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
21746 llvm::ConstantInt::get(IntTy, i)),
21748 Values.push_back(Builder.CreateBitCast(V, CType));
21749 }
21750 Value *Result = Builder.CreateCall(Intrinsic, Values);
21751 llvm::Type *DType = Dst.getElementType();
21752 for (unsigned i = 0; i < MI.NumEltsD; ++i)
21754 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
21756 llvm::ConstantInt::get(IntTy, i)),
21758 return Result;
21759 }
21760 // The following builtins require half type support
21761 case NVPTX::BI__nvvm_ex2_approx_f16:
21762 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
21763 case NVPTX::BI__nvvm_ex2_approx_f16x2:
21764 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
21765 case NVPTX::BI__nvvm_ff2f16x2_rn:
21766 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
21767 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
21768 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
21769 case NVPTX::BI__nvvm_ff2f16x2_rz:
21770 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
21771 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
21772 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
21773 case NVPTX::BI__nvvm_fma_rn_f16:
21774 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
21775 case NVPTX::BI__nvvm_fma_rn_f16x2:
21776 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
21777 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
21778 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
21779 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
21780 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
21781 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
21782 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
21783 *this);
21784 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
21785 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
21786 *this);
21787 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
21788 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
21789 *this);
21790 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
21791 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
21792 *this);
21793 case NVPTX::BI__nvvm_fma_rn_relu_f16:
21794 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
21795 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
21796 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
21797 case NVPTX::BI__nvvm_fma_rn_sat_f16:
21798 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
21799 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
21800 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
21801 case NVPTX::BI__nvvm_fmax_f16:
21802 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
21803 case NVPTX::BI__nvvm_fmax_f16x2:
21804 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
21805 case NVPTX::BI__nvvm_fmax_ftz_f16:
21806 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
21807 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
21808 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
21809 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
21810 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
21811 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
21812 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
21813 *this);
21814 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
21815 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
21816 E, *this);
21817 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
21818 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
21819 BuiltinID, E, *this);
21820 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
21821 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
21822 *this);
21823 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
21824 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
21825 E, *this);
21826 case NVPTX::BI__nvvm_fmax_nan_f16:
21827 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
21828 case NVPTX::BI__nvvm_fmax_nan_f16x2:
21829 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
21830 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
21831 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
21832 *this);
21833 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
21834 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
21835 E, *this);
21836 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
21837 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
21838 *this);
21839 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
21840 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
21841 *this);
21842 case NVPTX::BI__nvvm_fmin_f16:
21843 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
21844 case NVPTX::BI__nvvm_fmin_f16x2:
21845 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
21846 case NVPTX::BI__nvvm_fmin_ftz_f16:
21847 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
21848 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
21849 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
21850 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
21851 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
21852 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
21853 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
21854 *this);
21855 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
21856 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
21857 E, *this);
21858 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
21859 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
21860 BuiltinID, E, *this);
21861 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
21862 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
21863 *this);
21864 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
21865 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
21866 E, *this);
21867 case NVPTX::BI__nvvm_fmin_nan_f16:
21868 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
21869 case NVPTX::BI__nvvm_fmin_nan_f16x2:
21870 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
21871 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
21872 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
21873 *this);
21874 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
21875 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
21876 E, *this);
21877 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
21878 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
21879 *this);
21880 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
21881 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
21882 *this);
21883 case NVPTX::BI__nvvm_ldg_h:
21884 case NVPTX::BI__nvvm_ldg_h2:
21885 return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
21886 case NVPTX::BI__nvvm_ldu_h:
21887 case NVPTX::BI__nvvm_ldu_h2:
21888 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
21889 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
21890 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
21891 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
21892 4);
21893 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
21894 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
21895 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
21896 8);
21897 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
21898 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
21899 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
21900 16);
21901 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
21902 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
21903 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
21904 16);
21905 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
21906 return Builder.CreateCall(
21907 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
21908 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
21909 return Builder.CreateCall(
21910 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
21911 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
21912 return Builder.CreateCall(
21913 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
21914 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
21915 return Builder.CreateCall(
21916 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
21917 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
21918 return Builder.CreateCall(
21919 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
21920 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
21921 return Builder.CreateCall(
21922 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
21923 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
21924 return Builder.CreateCall(
21925 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
21926 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
21927 return Builder.CreateCall(
21928 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
21929 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
21930 return Builder.CreateCall(
21931 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
21932 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
21933 return Builder.CreateCall(
21934 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
21935 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
21936 return Builder.CreateCall(
21937 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
21938 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
21939 return Builder.CreateCall(
21940 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
21941 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
21942 return Builder.CreateCall(
21943 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
21944 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
21945 return Builder.CreateCall(
21946 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
21947 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
21948 return Builder.CreateCall(
21949 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
21950 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
21951 return Builder.CreateCall(
21952 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
21953 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
21954 return Builder.CreateCall(
21955 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
21956 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
21957 return Builder.CreateCall(
21958 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
21959 case NVPTX::BI__nvvm_is_explicit_cluster:
21960 return Builder.CreateCall(
21961 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
21962 case NVPTX::BI__nvvm_isspacep_shared_cluster:
21963 return Builder.CreateCall(
21964 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
21965 EmitScalarExpr(E->getArg(0)));
21966 case NVPTX::BI__nvvm_mapa:
21967 return Builder.CreateCall(
21968 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
21969 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21970 case NVPTX::BI__nvvm_mapa_shared_cluster:
21971 return Builder.CreateCall(
21972 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
21973 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21974 case NVPTX::BI__nvvm_getctarank:
21975 return Builder.CreateCall(
21976 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
21977 EmitScalarExpr(E->getArg(0)));
21978 case NVPTX::BI__nvvm_getctarank_shared_cluster:
21979 return Builder.CreateCall(
21980 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
21981 EmitScalarExpr(E->getArg(0)));
21982 case NVPTX::BI__nvvm_barrier_cluster_arrive:
21983 return Builder.CreateCall(
21984 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
21985 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
21986 return Builder.CreateCall(
21987 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
21988 case NVPTX::BI__nvvm_barrier_cluster_wait:
21989 return Builder.CreateCall(
21990 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
21991 case NVPTX::BI__nvvm_fence_sc_cluster:
21992 return Builder.CreateCall(
21993 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
21994 default:
21995 return nullptr;
21996 }
21997}
21998
21999namespace {
22000struct BuiltinAlignArgs {
22001 llvm::Value *Src = nullptr;
22002 llvm::Type *SrcType = nullptr;
22003 llvm::Value *Alignment = nullptr;
22004 llvm::Value *Mask = nullptr;
22005 llvm::IntegerType *IntType = nullptr;
22006
22007 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
22008 QualType AstType = E->getArg(0)->getType();
22009 if (AstType->isArrayType())
22010 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
22011 else
22012 Src = CGF.EmitScalarExpr(E->getArg(0));
22013 SrcType = Src->getType();
22014 if (SrcType->isPointerTy()) {
22015 IntType = IntegerType::get(
22016 CGF.getLLVMContext(),
22017 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
22018 } else {
22019 assert(SrcType->isIntegerTy());
22020 IntType = cast<llvm::IntegerType>(SrcType);
22021 }
22022 Alignment = CGF.EmitScalarExpr(E->getArg(1));
22023 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
22024 auto *One = llvm::ConstantInt::get(IntType, 1);
22025 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
22026 }
22027};
22028} // namespace
22029
22030/// Generate (x & (y-1)) == 0.
22032 BuiltinAlignArgs Args(E, *this);
22033 llvm::Value *SrcAddress = Args.Src;
22034 if (Args.SrcType->isPointerTy())
22035 SrcAddress =
22036 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
22037 return RValue::get(Builder.CreateICmpEQ(
22038 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
22039 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
22040}
22041
22042/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
22043/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
22044/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
22046 BuiltinAlignArgs Args(E, *this);
22047 llvm::Value *SrcForMask = Args.Src;
22048 if (AlignUp) {
22049 // When aligning up we have to first add the mask to ensure we go over the
22050 // next alignment value and then align down to the next valid multiple.
22051 // By adding the mask, we ensure that align_up on an already aligned
22052 // value will not change the value.
22053 if (Args.Src->getType()->isPointerTy()) {
22054 if (getLangOpts().isSignedOverflowDefined())
22055 SrcForMask =
22056 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
22057 else
22058 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
22059 /*SignedIndices=*/true,
22060 /*isSubtraction=*/false,
22061 E->getExprLoc(), "over_boundary");
22062 } else {
22063 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
22064 }
22065 }
22066 // Invert the mask to only clear the lower bits.
22067 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
22068 llvm::Value *Result = nullptr;
22069 if (Args.Src->getType()->isPointerTy()) {
22070 Result = Builder.CreateIntrinsic(
22071 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
22072 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
22073 } else {
22074 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
22075 }
22076 assert(Result->getType() == Args.SrcType);
22077 return RValue::get(Result);
22078}
22079
22081 const CallExpr *E) {
22082 switch (BuiltinID) {
22083 case WebAssembly::BI__builtin_wasm_memory_size: {
22084 llvm::Type *ResultType = ConvertType(E->getType());
22085 Value *I = EmitScalarExpr(E->getArg(0));
22086 Function *Callee =
22087 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
22088 return Builder.CreateCall(Callee, I);
22089 }
22090 case WebAssembly::BI__builtin_wasm_memory_grow: {
22091 llvm::Type *ResultType = ConvertType(E->getType());
22092 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
22093 EmitScalarExpr(E->getArg(1))};
22094 Function *Callee =
22095 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
22096 return Builder.CreateCall(Callee, Args);
22097 }
22098 case WebAssembly::BI__builtin_wasm_tls_size: {
22099 llvm::Type *ResultType = ConvertType(E->getType());
22100 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
22101 return Builder.CreateCall(Callee);
22102 }
22103 case WebAssembly::BI__builtin_wasm_tls_align: {
22104 llvm::Type *ResultType = ConvertType(E->getType());
22105 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
22106 return Builder.CreateCall(Callee);
22107 }
22108 case WebAssembly::BI__builtin_wasm_tls_base: {
22109 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
22110 return Builder.CreateCall(Callee);
22111 }
22112 case WebAssembly::BI__builtin_wasm_throw: {
22113 Value *Tag = EmitScalarExpr(E->getArg(0));
22114 Value *Obj = EmitScalarExpr(E->getArg(1));
22115 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
22116 return Builder.CreateCall(Callee, {Tag, Obj});
22117 }
22118 case WebAssembly::BI__builtin_wasm_rethrow: {
22119 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
22120 return Builder.CreateCall(Callee);
22121 }
22122 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
22123 Value *Addr = EmitScalarExpr(E->getArg(0));
22124 Value *Expected = EmitScalarExpr(E->getArg(1));
22125 Value *Timeout = EmitScalarExpr(E->getArg(2));
22126 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
22127 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22128 }
22129 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
22130 Value *Addr = EmitScalarExpr(E->getArg(0));
22131 Value *Expected = EmitScalarExpr(E->getArg(1));
22132 Value *Timeout = EmitScalarExpr(E->getArg(2));
22133 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
22134 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22135 }
22136 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
22137 Value *Addr = EmitScalarExpr(E->getArg(0));
22138 Value *Count = EmitScalarExpr(E->getArg(1));
22139 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
22140 return Builder.CreateCall(Callee, {Addr, Count});
22141 }
22142 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
22143 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
22144 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
22145 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
22146 Value *Src = EmitScalarExpr(E->getArg(0));
22147 llvm::Type *ResT = ConvertType(E->getType());
22148 Function *Callee =
22149 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
22150 return Builder.CreateCall(Callee, {Src});
22151 }
22152 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
22153 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
22154 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
22155 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
22156 Value *Src = EmitScalarExpr(E->getArg(0));
22157 llvm::Type *ResT = ConvertType(E->getType());
22158 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
22159 {ResT, Src->getType()});
22160 return Builder.CreateCall(Callee, {Src});
22161 }
22162 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
22163 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
22164 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
22165 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
22166 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
22167 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
22168 Value *Src = EmitScalarExpr(E->getArg(0));
22169 llvm::Type *ResT = ConvertType(E->getType());
22170 Function *Callee =
22171 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
22172 return Builder.CreateCall(Callee, {Src});
22173 }
22174 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
22175 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
22176 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
22177 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
22178 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
22179 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
22180 Value *Src = EmitScalarExpr(E->getArg(0));
22181 llvm::Type *ResT = ConvertType(E->getType());
22182 Function *Callee =
22183 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
22184 return Builder.CreateCall(Callee, {Src});
22185 }
22186 case WebAssembly::BI__builtin_wasm_min_f32:
22187 case WebAssembly::BI__builtin_wasm_min_f64:
22188 case WebAssembly::BI__builtin_wasm_min_f16x8:
22189 case WebAssembly::BI__builtin_wasm_min_f32x4:
22190 case WebAssembly::BI__builtin_wasm_min_f64x2: {
22191 Value *LHS = EmitScalarExpr(E->getArg(0));
22192 Value *RHS = EmitScalarExpr(E->getArg(1));
22193 Function *Callee =
22194 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
22195 return Builder.CreateCall(Callee, {LHS, RHS});
22196 }
22197 case WebAssembly::BI__builtin_wasm_max_f32:
22198 case WebAssembly::BI__builtin_wasm_max_f64:
22199 case WebAssembly::BI__builtin_wasm_max_f16x8:
22200 case WebAssembly::BI__builtin_wasm_max_f32x4:
22201 case WebAssembly::BI__builtin_wasm_max_f64x2: {
22202 Value *LHS = EmitScalarExpr(E->getArg(0));
22203 Value *RHS = EmitScalarExpr(E->getArg(1));
22204 Function *Callee =
22205 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
22206 return Builder.CreateCall(Callee, {LHS, RHS});
22207 }
22208 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
22209 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
22210 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
22211 Value *LHS = EmitScalarExpr(E->getArg(0));
22212 Value *RHS = EmitScalarExpr(E->getArg(1));
22213 Function *Callee =
22214 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
22215 return Builder.CreateCall(Callee, {LHS, RHS});
22216 }
22217 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
22218 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
22219 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
22220 Value *LHS = EmitScalarExpr(E->getArg(0));
22221 Value *RHS = EmitScalarExpr(E->getArg(1));
22222 Function *Callee =
22223 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
22224 return Builder.CreateCall(Callee, {LHS, RHS});
22225 }
22226 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22227 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22228 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22229 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22230 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22231 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22232 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22233 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22234 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22235 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22236 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22237 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
22238 unsigned IntNo;
22239 switch (BuiltinID) {
22240 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22241 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22242 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22243 IntNo = Intrinsic::ceil;
22244 break;
22245 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22246 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22247 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22248 IntNo = Intrinsic::floor;
22249 break;
22250 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22251 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22252 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22253 IntNo = Intrinsic::trunc;
22254 break;
22255 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22256 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22257 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
22258 IntNo = Intrinsic::nearbyint;
22259 break;
22260 default:
22261 llvm_unreachable("unexpected builtin ID");
22262 }
22263 Value *Value = EmitScalarExpr(E->getArg(0));
22265 return Builder.CreateCall(Callee, Value);
22266 }
22267 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
22268 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
22269 return Builder.CreateCall(Callee);
22270 }
22271 case WebAssembly::BI__builtin_wasm_ref_null_func: {
22272 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
22273 return Builder.CreateCall(Callee);
22274 }
22275 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
22276 Value *Src = EmitScalarExpr(E->getArg(0));
22277 Value *Indices = EmitScalarExpr(E->getArg(1));
22278 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
22279 return Builder.CreateCall(Callee, {Src, Indices});
22280 }
22281 case WebAssembly::BI__builtin_wasm_abs_i8x16:
22282 case WebAssembly::BI__builtin_wasm_abs_i16x8:
22283 case WebAssembly::BI__builtin_wasm_abs_i32x4:
22284 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
22285 Value *Vec = EmitScalarExpr(E->getArg(0));
22286 Value *Neg = Builder.CreateNeg(Vec, "neg");
22287 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
22288 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
22289 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
22290 }
22291 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
22292 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
22293 Value *LHS = EmitScalarExpr(E->getArg(0));
22294 Value *RHS = EmitScalarExpr(E->getArg(1));
22295 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
22296 ConvertType(E->getType()));
22297 return Builder.CreateCall(Callee, {LHS, RHS});
22298 }
22299 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
22300 Value *LHS = EmitScalarExpr(E->getArg(0));
22301 Value *RHS = EmitScalarExpr(E->getArg(1));
22302 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
22303 return Builder.CreateCall(Callee, {LHS, RHS});
22304 }
22305 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22306 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22307 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22308 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
22309 Value *Vec = EmitScalarExpr(E->getArg(0));
22310 unsigned IntNo;
22311 switch (BuiltinID) {
22312 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22313 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22314 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
22315 break;
22316 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22317 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
22318 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
22319 break;
22320 default:
22321 llvm_unreachable("unexpected builtin ID");
22322 }
22323
22325 return Builder.CreateCall(Callee, Vec);
22326 }
22327 case WebAssembly::BI__builtin_wasm_bitselect: {
22328 Value *V1 = EmitScalarExpr(E->getArg(0));
22329 Value *V2 = EmitScalarExpr(E->getArg(1));
22330 Value *C = EmitScalarExpr(E->getArg(2));
22331 Function *Callee =
22332 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
22333 return Builder.CreateCall(Callee, {V1, V2, C});
22334 }
22335 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
22336 Value *LHS = EmitScalarExpr(E->getArg(0));
22337 Value *RHS = EmitScalarExpr(E->getArg(1));
22338 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
22339 return Builder.CreateCall(Callee, {LHS, RHS});
22340 }
22341 case WebAssembly::BI__builtin_wasm_any_true_v128:
22342 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22343 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22344 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22345 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
22346 unsigned IntNo;
22347 switch (BuiltinID) {
22348 case WebAssembly::BI__builtin_wasm_any_true_v128:
22349 IntNo = Intrinsic::wasm_anytrue;
22350 break;
22351 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22352 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22353 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22354 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
22355 IntNo = Intrinsic::wasm_alltrue;
22356 break;
22357 default:
22358 llvm_unreachable("unexpected builtin ID");
22359 }
22360 Value *Vec = EmitScalarExpr(E->getArg(0));
22361 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
22362 return Builder.CreateCall(Callee, {Vec});
22363 }
22364 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
22365 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
22366 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
22367 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
22368 Value *Vec = EmitScalarExpr(E->getArg(0));
22369 Function *Callee =
22370 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
22371 return Builder.CreateCall(Callee, {Vec});
22372 }
22373 case WebAssembly::BI__builtin_wasm_abs_f16x8:
22374 case WebAssembly::BI__builtin_wasm_abs_f32x4:
22375 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
22376 Value *Vec = EmitScalarExpr(E->getArg(0));
22377 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
22378 return Builder.CreateCall(Callee, {Vec});
22379 }
22380 case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
22381 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
22382 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
22383 Value *Vec = EmitScalarExpr(E->getArg(0));
22384 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
22385 return Builder.CreateCall(Callee, {Vec});
22386 }
22387 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22388 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22389 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22390 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
22391 Value *Low = EmitScalarExpr(E->getArg(0));
22392 Value *High = EmitScalarExpr(E->getArg(1));
22393 unsigned IntNo;
22394 switch (BuiltinID) {
22395 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22396 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22397 IntNo = Intrinsic::wasm_narrow_signed;
22398 break;
22399 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22400 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
22401 IntNo = Intrinsic::wasm_narrow_unsigned;
22402 break;
22403 default:
22404 llvm_unreachable("unexpected builtin ID");
22405 }
22406 Function *Callee =
22407 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
22408 return Builder.CreateCall(Callee, {Low, High});
22409 }
22410 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22411 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
22412 Value *Vec = EmitScalarExpr(E->getArg(0));
22413 unsigned IntNo;
22414 switch (BuiltinID) {
22415 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22416 IntNo = Intrinsic::fptosi_sat;
22417 break;
22418 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
22419 IntNo = Intrinsic::fptoui_sat;
22420 break;
22421 default:
22422 llvm_unreachable("unexpected builtin ID");
22423 }
22424 llvm::Type *SrcT = Vec->getType();
22425 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
22426 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
22427 Value *Trunc = Builder.CreateCall(Callee, Vec);
22428 Value *Splat = Constant::getNullValue(TruncT);
22429 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
22430 }
22431 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
22432 Value *Ops[18];
22433 size_t OpIdx = 0;
22434 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
22435 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
22436 while (OpIdx < 18) {
22437 std::optional<llvm::APSInt> LaneConst =
22438 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
22439 assert(LaneConst && "Constant arg isn't actually constant?");
22440 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
22441 }
22442 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
22443 return Builder.CreateCall(Callee, Ops);
22444 }
22445 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22446 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22447 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22448 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22449 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22450 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
22451 Value *A = EmitScalarExpr(E->getArg(0));
22452 Value *B = EmitScalarExpr(E->getArg(1));
22453 Value *C = EmitScalarExpr(E->getArg(2));
22454 unsigned IntNo;
22455 switch (BuiltinID) {
22456 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22457 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22458 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22459 IntNo = Intrinsic::wasm_relaxed_madd;
22460 break;
22461 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22462 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22463 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
22464 IntNo = Intrinsic::wasm_relaxed_nmadd;
22465 break;
22466 default:
22467 llvm_unreachable("unexpected builtin ID");
22468 }
22469 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
22470 return Builder.CreateCall(Callee, {A, B, C});
22471 }
22472 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
22473 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
22474 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
22475 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
22476 Value *A = EmitScalarExpr(E->getArg(0));
22477 Value *B = EmitScalarExpr(E->getArg(1));
22478 Value *C = EmitScalarExpr(E->getArg(2));
22479 Function *Callee =
22480 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
22481 return Builder.CreateCall(Callee, {A, B, C});
22482 }
22483 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
22484 Value *Src = EmitScalarExpr(E->getArg(0));
22485 Value *Indices = EmitScalarExpr(E->getArg(1));
22486 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
22487 return Builder.CreateCall(Callee, {Src, Indices});
22488 }
22489 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22490 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22491 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22492 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
22493 Value *LHS = EmitScalarExpr(E->getArg(0));
22494 Value *RHS = EmitScalarExpr(E->getArg(1));
22495 unsigned IntNo;
22496 switch (BuiltinID) {
22497 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22498 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22499 IntNo = Intrinsic::wasm_relaxed_min;
22500 break;
22501 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22502 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
22503 IntNo = Intrinsic::wasm_relaxed_max;
22504 break;
22505 default:
22506 llvm_unreachable("unexpected builtin ID");
22507 }
22508 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
22509 return Builder.CreateCall(Callee, {LHS, RHS});
22510 }
22511 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22512 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22513 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22514 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
22515 Value *Vec = EmitScalarExpr(E->getArg(0));
22516 unsigned IntNo;
22517 switch (BuiltinID) {
22518 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22519 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
22520 break;
22521 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22522 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
22523 break;
22524 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22525 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
22526 break;
22527 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
22528 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
22529 break;
22530 default:
22531 llvm_unreachable("unexpected builtin ID");
22532 }
22533 Function *Callee = CGM.getIntrinsic(IntNo);
22534 return Builder.CreateCall(Callee, {Vec});
22535 }
22536 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
22537 Value *LHS = EmitScalarExpr(E->getArg(0));
22538 Value *RHS = EmitScalarExpr(E->getArg(1));
22539 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
22540 return Builder.CreateCall(Callee, {LHS, RHS});
22541 }
22542 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
22543 Value *LHS = EmitScalarExpr(E->getArg(0));
22544 Value *RHS = EmitScalarExpr(E->getArg(1));
22545 Function *Callee =
22546 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
22547 return Builder.CreateCall(Callee, {LHS, RHS});
22548 }
22549 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
22550 Value *LHS = EmitScalarExpr(E->getArg(0));
22551 Value *RHS = EmitScalarExpr(E->getArg(1));
22552 Value *Acc = EmitScalarExpr(E->getArg(2));
22553 Function *Callee =
22554 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
22555 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22556 }
22557 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
22558 Value *LHS = EmitScalarExpr(E->getArg(0));
22559 Value *RHS = EmitScalarExpr(E->getArg(1));
22560 Value *Acc = EmitScalarExpr(E->getArg(2));
22561 Function *Callee =
22562 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
22563 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22564 }
22565 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
22566 Value *Addr = EmitScalarExpr(E->getArg(0));
22567 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
22568 return Builder.CreateCall(Callee, {Addr});
22569 }
22570 case WebAssembly::BI__builtin_wasm_storef16_f32: {
22571 Value *Val = EmitScalarExpr(E->getArg(0));
22572 Value *Addr = EmitScalarExpr(E->getArg(1));
22573 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
22574 return Builder.CreateCall(Callee, {Val, Addr});
22575 }
22576 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
22577 Value *Val = EmitScalarExpr(E->getArg(0));
22578 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
22579 return Builder.CreateCall(Callee, {Val});
22580 }
22581 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
22582 Value *Vector = EmitScalarExpr(E->getArg(0));
22583 Value *Index = EmitScalarExpr(E->getArg(1));
22584 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
22585 return Builder.CreateCall(Callee, {Vector, Index});
22586 }
22587 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
22588 Value *Vector = EmitScalarExpr(E->getArg(0));
22589 Value *Index = EmitScalarExpr(E->getArg(1));
22590 Value *Val = EmitScalarExpr(E->getArg(2));
22591 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
22592 return Builder.CreateCall(Callee, {Vector, Index, Val});
22593 }
22594 case WebAssembly::BI__builtin_wasm_table_get: {
22595 assert(E->getArg(0)->getType()->isArrayType());
22596 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22597 Value *Index = EmitScalarExpr(E->getArg(1));
22600 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
22601 else if (E->getType().isWebAssemblyFuncrefType())
22602 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
22603 else
22604 llvm_unreachable(
22605 "Unexpected reference type for __builtin_wasm_table_get");
22606 return Builder.CreateCall(Callee, {Table, Index});
22607 }
22608 case WebAssembly::BI__builtin_wasm_table_set: {
22609 assert(E->getArg(0)->getType()->isArrayType());
22610 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22611 Value *Index = EmitScalarExpr(E->getArg(1));
22612 Value *Val = EmitScalarExpr(E->getArg(2));
22614 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22615 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
22616 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22617 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
22618 else
22619 llvm_unreachable(
22620 "Unexpected reference type for __builtin_wasm_table_set");
22621 return Builder.CreateCall(Callee, {Table, Index, Val});
22622 }
22623 case WebAssembly::BI__builtin_wasm_table_size: {
22624 assert(E->getArg(0)->getType()->isArrayType());
22625 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22626 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
22627 return Builder.CreateCall(Callee, Value);
22628 }
22629 case WebAssembly::BI__builtin_wasm_table_grow: {
22630 assert(E->getArg(0)->getType()->isArrayType());
22631 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22632 Value *Val = EmitScalarExpr(E->getArg(1));
22633 Value *NElems = EmitScalarExpr(E->getArg(2));
22634
22636 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
22637 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
22638 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22639 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22640 else
22641 llvm_unreachable(
22642 "Unexpected reference type for __builtin_wasm_table_grow");
22643
22644 return Builder.CreateCall(Callee, {Table, Val, NElems});
22645 }
22646 case WebAssembly::BI__builtin_wasm_table_fill: {
22647 assert(E->getArg(0)->getType()->isArrayType());
22648 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22649 Value *Index = EmitScalarExpr(E->getArg(1));
22650 Value *Val = EmitScalarExpr(E->getArg(2));
22651 Value *NElems = EmitScalarExpr(E->getArg(3));
22652
22654 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22655 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
22656 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22657 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22658 else
22659 llvm_unreachable(
22660 "Unexpected reference type for __builtin_wasm_table_fill");
22661
22662 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
22663 }
22664 case WebAssembly::BI__builtin_wasm_table_copy: {
22665 assert(E->getArg(0)->getType()->isArrayType());
22666 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22667 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
22668 Value *DstIdx = EmitScalarExpr(E->getArg(2));
22669 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
22670 Value *NElems = EmitScalarExpr(E->getArg(4));
22671
22672 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
22673
22674 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
22675 }
22676 default:
22677 return nullptr;
22678 }
22679}
22680
22681static std::pair<Intrinsic::ID, unsigned>
22683 struct Info {
22684 unsigned BuiltinID;
22685 Intrinsic::ID IntrinsicID;
22686 unsigned VecLen;
22687 };
22688 static Info Infos[] = {
22689#define CUSTOM_BUILTIN_MAPPING(x,s) \
22690 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
22691 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
22692 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
22693 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
22694 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
22695 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
22696 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
22697 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
22698 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
22699 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
22700 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
22701 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
22702 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
22703 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
22704 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
22705 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
22706 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
22707 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
22708 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
22709 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
22710 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
22711 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
22712 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
22713 // Legacy builtins that take a vector in place of a vector predicate.
22714 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
22715 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
22716 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
22717 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
22718 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
22719 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
22720 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
22721 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
22722#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
22723#undef CUSTOM_BUILTIN_MAPPING
22724 };
22725
22726 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
22727 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
22728 (void)SortOnce;
22729
22730 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
22731 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
22732 return {Intrinsic::not_intrinsic, 0};
22733
22734 return {F->IntrinsicID, F->VecLen};
22735}
22736
22738 const CallExpr *E) {
22739 Intrinsic::ID ID;
22740 unsigned VecLen;
22741 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
22742
22743 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
22744 // The base pointer is passed by address, so it needs to be loaded.
22745 Address A = EmitPointerWithAlignment(E->getArg(0));
22747 llvm::Value *Base = Builder.CreateLoad(BP);
22748 // The treatment of both loads and stores is the same: the arguments for
22749 // the builtin are the same as the arguments for the intrinsic.
22750 // Load:
22751 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
22752 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
22753 // Store:
22754 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
22755 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
22757 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
22758 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22759
22760 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
22761 // The load intrinsics generate two results (Value, NewBase), stores
22762 // generate one (NewBase). The new base address needs to be stored.
22763 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
22764 : Result;
22765 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
22766 Address Dest = EmitPointerWithAlignment(E->getArg(0));
22767 llvm::Value *RetVal =
22768 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
22769 if (IsLoad)
22770 RetVal = Builder.CreateExtractValue(Result, 0);
22771 return RetVal;
22772 };
22773
22774 // Handle the conversion of bit-reverse load intrinsics to bit code.
22775 // The intrinsic call after this function only reads from memory and the
22776 // write to memory is dealt by the store instruction.
22777 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
22778 // The intrinsic generates one result, which is the new value for the base
22779 // pointer. It needs to be returned. The result of the load instruction is
22780 // passed to intrinsic by address, so the value needs to be stored.
22781 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
22782
22783 // Expressions like &(*pt++) will be incremented per evaluation.
22784 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
22785 // per call.
22786 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
22787 DestAddr = DestAddr.withElementType(Int8Ty);
22788 llvm::Value *DestAddress = DestAddr.