clang 20.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CGValue.h"
21#include "CodeGenFunction.h"
22#include "CodeGenModule.h"
23#include "ConstantEmitter.h"
24#include "PatternInit.h"
25#include "TargetInfo.h"
27#include "clang/AST/Attr.h"
28#include "clang/AST/Decl.h"
29#include "clang/AST/Expr.h"
30#include "clang/AST/OSLog.h"
32#include "clang/AST/Type.h"
38#include "llvm/ADT/APFloat.h"
39#include "llvm/ADT/APInt.h"
40#include "llvm/ADT/FloatingPointMode.h"
41#include "llvm/ADT/SmallPtrSet.h"
42#include "llvm/ADT/StringExtras.h"
43#include "llvm/Analysis/ValueTracking.h"
44#include "llvm/IR/DataLayout.h"
45#include "llvm/IR/InlineAsm.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsBPF.h"
51#include "llvm/IR/IntrinsicsDirectX.h"
52#include "llvm/IR/IntrinsicsHexagon.h"
53#include "llvm/IR/IntrinsicsNVPTX.h"
54#include "llvm/IR/IntrinsicsPowerPC.h"
55#include "llvm/IR/IntrinsicsR600.h"
56#include "llvm/IR/IntrinsicsRISCV.h"
57#include "llvm/IR/IntrinsicsS390.h"
58#include "llvm/IR/IntrinsicsWebAssembly.h"
59#include "llvm/IR/IntrinsicsX86.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/MatrixBuilder.h"
62#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
63#include "llvm/Support/AMDGPUAddrSpace.h"
64#include "llvm/Support/ConvertUTF.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/ScopedPrinter.h"
67#include "llvm/TargetParser/AArch64TargetParser.h"
68#include "llvm/TargetParser/RISCVISAInfo.h"
69#include "llvm/TargetParser/RISCVTargetParser.h"
70#include "llvm/TargetParser/X86TargetParser.h"
71#include <numeric>
72#include <optional>
73#include <utility>
74
75using namespace clang;
76using namespace CodeGen;
77using namespace llvm;
78
79static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
80 Align AlignmentInBytes) {
81 ConstantInt *Byte;
82 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
83 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
84 // Nothing to initialize.
85 return;
86 case LangOptions::TrivialAutoVarInitKind::Zero:
87 Byte = CGF.Builder.getInt8(0x00);
88 break;
89 case LangOptions::TrivialAutoVarInitKind::Pattern: {
90 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
91 Byte = llvm::dyn_cast<llvm::ConstantInt>(
92 initializationPatternFor(CGF.CGM, Int8));
93 break;
94 }
95 }
96 if (CGF.CGM.stopAutoInit())
97 return;
98 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
99 I->addAnnotationMetadata("auto-init");
100}
101
103 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
104
105 Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
106 Value *CMP;
107 Value *LastInstr;
108
109 if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
110 FZeroConst = ConstantVector::getSplat(
111 ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
112 auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
113 CMP = CGF->Builder.CreateIntrinsic(
114 CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
115 {FCompInst}, nullptr);
116 } else
117 CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
118
119 if (CGF->CGM.getTarget().getTriple().isDXIL())
120 LastInstr = CGF->Builder.CreateIntrinsic(
121 CGF->VoidTy, llvm::Intrinsic::dx_discard, {CMP}, nullptr);
122 else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
123 BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
124 BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
125
126 CGF->Builder.CreateCondBr(CMP, LT0, End);
127
128 CGF->Builder.SetInsertPoint(LT0);
129
130 CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::spv_discard, {},
131 nullptr);
132
133 LastInstr = CGF->Builder.CreateBr(End);
134
135 CGF->Builder.SetInsertPoint(End);
136 } else {
137 llvm_unreachable("Backend Codegen not supported.");
138 }
139
140 return LastInstr;
141}
142
144 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
145 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
146 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
147
148 CallArgList Args;
149 LValue Op1TmpLValue =
150 CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
151 LValue Op2TmpLValue =
152 CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
153
155 Args.reverseWritebacks();
156
157 Value *LowBits = nullptr;
158 Value *HighBits = nullptr;
159
160 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
161
162 llvm::Type *RetElementTy = CGF->Int32Ty;
163 if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
164 RetElementTy = llvm::VectorType::get(
165 CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
166 auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
167
168 CallInst *CI = CGF->Builder.CreateIntrinsic(
169 RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
170
171 LowBits = CGF->Builder.CreateExtractValue(CI, 0);
172 HighBits = CGF->Builder.CreateExtractValue(CI, 1);
173
174 } else {
175 // For Non DXIL targets we generate the instructions.
176
177 if (!Op0->getType()->isVectorTy()) {
178 FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
179 Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
180
181 LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
182 HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
183 } else {
184 int NumElements = 1;
185 if (const auto *VecTy =
186 E->getArg(0)->getType()->getAs<clang::VectorType>())
187 NumElements = VecTy->getNumElements();
188
189 FixedVectorType *Uint32VecTy =
190 FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
191 Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
192 if (NumElements == 1) {
193 LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
194 HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
195 } else {
196 SmallVector<int> EvenMask, OddMask;
197 for (int I = 0, E = NumElements; I != E; ++I) {
198 EvenMask.push_back(I * 2);
199 OddMask.push_back(I * 2 + 1);
200 }
201 LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
202 HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
203 }
204 }
205 }
206 CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
207 auto *LastInst =
208 CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
209 CGF->EmitWritebacks(Args);
210 return LastInst;
211}
212
214 assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
215 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
216 "asdouble operands types mismatch");
217 Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0));
218 Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1));
219
220 llvm::Type *ResultType = CGF.DoubleTy;
221 int N = 1;
222 if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
223 N = VTy->getNumElements();
224 ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
225 }
226
227 if (CGF.CGM.getTarget().getTriple().isDXIL())
228 return CGF.Builder.CreateIntrinsic(
229 /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
230 ArrayRef<Value *>{OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
231
232 if (!E->getArg(0)->getType()->isVectorType()) {
233 OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
234 OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
235 }
236
238 for (int i = 0; i < N; i++) {
239 Mask.push_back(i);
240 Mask.push_back(i + N);
241 }
242
243 Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
244
245 return CGF.Builder.CreateBitCast(BitVec, ResultType);
246}
247
248/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
249/// return it as an i8 pointer.
251 LLVMContext &Context = CGF.CGM.getLLVMContext();
252 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
253 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
254 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
255 llvm::Function *F =
256 CGF.CGM.getIntrinsic(llvm::Intrinsic::read_register, {CGF.Int64Ty});
257 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
258 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
259}
260
261/// getBuiltinLibFunction - Given a builtin id for a function like
262/// "__builtin_fabsf", return a Function* for "fabsf".
264 unsigned BuiltinID) {
265 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
266
267 // Get the name, skip over the __builtin_ prefix (if necessary).
268 StringRef Name;
269 GlobalDecl D(FD);
270
271 // TODO: This list should be expanded or refactored after all GCC-compatible
272 // std libcall builtins are implemented.
273 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
274 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
275 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
276 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
277 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
278 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
279 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
280 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
281 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
282 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
283 {Builtin::BI__builtin_printf, "__printfieee128"},
284 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
285 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
286 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
287 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
288 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
289 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
290 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
291 {Builtin::BI__builtin_scanf, "__scanfieee128"},
292 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
293 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
294 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
295 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
296 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
297 };
298
299 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
300 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
301 // if it is 64-bit 'long double' mode.
302 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
303 {Builtin::BI__builtin_frexpl, "frexp"},
304 {Builtin::BI__builtin_ldexpl, "ldexp"},
305 {Builtin::BI__builtin_modfl, "modf"},
306 };
307
308 // If the builtin has been declared explicitly with an assembler label,
309 // use the mangled name. This differs from the plain label on platforms
310 // that prefix labels.
311 if (FD->hasAttr<AsmLabelAttr>())
312 Name = getMangledName(D);
313 else {
314 // TODO: This mutation should also be applied to other targets other than
315 // PPC, after backend supports IEEE 128-bit style libcalls.
316 if (getTriple().isPPC64() &&
317 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
318 F128Builtins.contains(BuiltinID))
319 Name = F128Builtins[BuiltinID];
320 else if (getTriple().isOSAIX() &&
321 &getTarget().getLongDoubleFormat() ==
322 &llvm::APFloat::IEEEdouble() &&
323 AIXLongDouble64Builtins.contains(BuiltinID))
324 Name = AIXLongDouble64Builtins[BuiltinID];
325 else
326 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
327 }
328
329 llvm::FunctionType *Ty =
330 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
331
332 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
333}
334
335/// Emit the conversions required to turn the given value into an
336/// integer of the given size.
337static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
338 QualType T, llvm::IntegerType *IntType) {
339 V = CGF.EmitToMemory(V, T);
340
341 if (V->getType()->isPointerTy())
342 return CGF.Builder.CreatePtrToInt(V, IntType);
343
344 assert(V->getType() == IntType);
345 return V;
346}
347
348static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
349 QualType T, llvm::Type *ResultType) {
350 V = CGF.EmitFromMemory(V, T);
351
352 if (ResultType->isPointerTy())
353 return CGF.Builder.CreateIntToPtr(V, ResultType);
354
355 assert(V->getType() == ResultType);
356 return V;
357}
358
360 ASTContext &Ctx = CGF.getContext();
361 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
362 unsigned Bytes = Ptr.getElementType()->isPointerTy()
364 : Ptr.getElementType()->getScalarSizeInBits() / 8;
365 unsigned Align = Ptr.getAlignment().getQuantity();
366 if (Align % Bytes != 0) {
367 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
368 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
369 // Force address to be at least naturally-aligned.
370 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
371 }
372 return Ptr;
373}
374
375/// Utility to insert an atomic instruction based on Intrinsic::ID
376/// and the expression node.
378 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
379 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
380
381 QualType T = E->getType();
382 assert(E->getArg(0)->getType()->isPointerType());
384 E->getArg(0)->getType()->getPointeeType()));
385 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
386
387 Address DestAddr = CheckAtomicAlignment(CGF, E);
388
389 llvm::IntegerType *IntType = llvm::IntegerType::get(
390 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
391
392 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
393 llvm::Type *ValueType = Val->getType();
394 Val = EmitToInt(CGF, Val, T, IntType);
395
396 llvm::Value *Result =
397 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
398 return EmitFromInt(CGF, Result, T, ValueType);
399}
400
402 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
403 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
404
405 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
406 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
407 LV.setNontemporal(true);
408 CGF.EmitStoreOfScalar(Val, LV, false);
409 return nullptr;
410}
411
413 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
414
415 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
416 LV.setNontemporal(true);
417 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
418}
419
421 llvm::AtomicRMWInst::BinOp Kind,
422 const CallExpr *E) {
423 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
424}
425
426/// Utility to insert an atomic instruction based Intrinsic::ID and
427/// the expression node, where the return value is the result of the
428/// operation.
430 llvm::AtomicRMWInst::BinOp Kind,
431 const CallExpr *E,
432 Instruction::BinaryOps Op,
433 bool Invert = false) {
434 QualType T = E->getType();
435 assert(E->getArg(0)->getType()->isPointerType());
437 E->getArg(0)->getType()->getPointeeType()));
438 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
439
440 Address DestAddr = CheckAtomicAlignment(CGF, E);
441
442 llvm::IntegerType *IntType = llvm::IntegerType::get(
443 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
444
445 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
446 llvm::Type *ValueType = Val->getType();
447 Val = EmitToInt(CGF, Val, T, IntType);
448
449 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
450 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
451 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
452 if (Invert)
453 Result =
454 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
455 llvm::ConstantInt::getAllOnesValue(IntType));
456 Result = EmitFromInt(CGF, Result, T, ValueType);
457 return RValue::get(Result);
458}
459
460/// Utility to insert an atomic cmpxchg instruction.
461///
462/// @param CGF The current codegen function.
463/// @param E Builtin call expression to convert to cmpxchg.
464/// arg0 - address to operate on
465/// arg1 - value to compare with
466/// arg2 - new value
467/// @param ReturnBool Specifies whether to return success flag of
468/// cmpxchg result or the old value.
469///
470/// @returns result of cmpxchg, according to ReturnBool
471///
472/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
473/// invoke the function EmitAtomicCmpXchgForMSIntrin.
475 bool ReturnBool) {
476 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
477 Address DestAddr = CheckAtomicAlignment(CGF, E);
478
479 llvm::IntegerType *IntType = llvm::IntegerType::get(
480 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
481
482 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
483 llvm::Type *ValueType = Cmp->getType();
484 Cmp = EmitToInt(CGF, Cmp, T, IntType);
485 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
486
488 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
489 llvm::AtomicOrdering::SequentiallyConsistent);
490 if (ReturnBool)
491 // Extract boolean success flag and zext it to int.
492 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
493 CGF.ConvertType(E->getType()));
494 else
495 // Extract old value and emit it using the same type as compare value.
496 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
497 ValueType);
498}
499
500/// This function should be invoked to emit atomic cmpxchg for Microsoft's
501/// _InterlockedCompareExchange* intrinsics which have the following signature:
502/// T _InterlockedCompareExchange(T volatile *Destination,
503/// T Exchange,
504/// T Comparand);
505///
506/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
507/// cmpxchg *Destination, Comparand, Exchange.
508/// So we need to swap Comparand and Exchange when invoking
509/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
510/// function MakeAtomicCmpXchgValue since it expects the arguments to be
511/// already swapped.
512
513static
515 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
516 assert(E->getArg(0)->getType()->isPointerType());
518 E->getType(), E->getArg(0)->getType()->getPointeeType()));
520 E->getArg(1)->getType()));
522 E->getArg(2)->getType()));
523
524 Address DestAddr = CheckAtomicAlignment(CGF, E);
525
526 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
527 auto *RTy = Exchange->getType();
528
529 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
530
531 if (RTy->isPointerTy()) {
532 Exchange = CGF.Builder.CreatePtrToInt(Exchange, CGF.IntPtrTy);
533 Comparand = CGF.Builder.CreatePtrToInt(Comparand, CGF.IntPtrTy);
534 }
535
536 // For Release ordering, the failure ordering should be Monotonic.
537 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
538 AtomicOrdering::Monotonic :
539 SuccessOrdering;
540
541 // The atomic instruction is marked volatile for consistency with MSVC. This
542 // blocks the few atomics optimizations that LLVM has. If we want to optimize
543 // _Interlocked* operations in the future, we will have to remove the volatile
544 // marker.
545 auto *CmpXchg = CGF.Builder.CreateAtomicCmpXchg(
546 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
547 CmpXchg->setVolatile(true);
548
549 auto *Result = CGF.Builder.CreateExtractValue(CmpXchg, 0);
550 if (RTy->isPointerTy()) {
551 Result = CGF.Builder.CreateIntToPtr(Result, RTy);
552 }
553
554 return Result;
555}
556
557// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
558// prototyped like this:
559//
560// unsigned char _InterlockedCompareExchange128...(
561// __int64 volatile * _Destination,
562// __int64 _ExchangeHigh,
563// __int64 _ExchangeLow,
564// __int64 * _ComparandResult);
565//
566// Note that Destination is assumed to be at least 16-byte aligned, despite
567// being typed int64.
568
570 const CallExpr *E,
571 AtomicOrdering SuccessOrdering) {
572 assert(E->getNumArgs() == 4);
573 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
574 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
575 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
576 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
577
578 assert(DestPtr->getType()->isPointerTy());
579 assert(!ExchangeHigh->getType()->isPointerTy());
580 assert(!ExchangeLow->getType()->isPointerTy());
581
582 // For Release ordering, the failure ordering should be Monotonic.
583 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
584 ? AtomicOrdering::Monotonic
585 : SuccessOrdering;
586
587 // Convert to i128 pointers and values. Alignment is also overridden for
588 // destination pointer.
589 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
590 Address DestAddr(DestPtr, Int128Ty,
592 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
593
594 // (((i128)hi) << 64) | ((i128)lo)
595 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
596 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
597 ExchangeHigh =
598 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
599 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
600
601 // Load the comparand for the instruction.
602 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
603
604 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
605 SuccessOrdering, FailureOrdering);
606
607 // The atomic instruction is marked volatile for consistency with MSVC. This
608 // blocks the few atomics optimizations that LLVM has. If we want to optimize
609 // _Interlocked* operations in the future, we will have to remove the volatile
610 // marker.
611 CXI->setVolatile(true);
612
613 // Store the result as an outparameter.
614 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
615 ComparandAddr);
616
617 // Get the success boolean and zero extend it to i8.
618 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
619 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
620}
621
623 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
624 assert(E->getArg(0)->getType()->isPointerType());
625
626 auto *IntTy = CGF.ConvertType(E->getType());
627 Address DestAddr = CheckAtomicAlignment(CGF, E);
628 auto *Result = CGF.Builder.CreateAtomicRMW(
629 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
630 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
631}
632
634 CodeGenFunction &CGF, const CallExpr *E,
635 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
636 assert(E->getArg(0)->getType()->isPointerType());
637
638 auto *IntTy = CGF.ConvertType(E->getType());
639 Address DestAddr = CheckAtomicAlignment(CGF, E);
640 auto *Result = CGF.Builder.CreateAtomicRMW(
641 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
642 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
643}
644
645// Build a plain volatile load.
647 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
648 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
649 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
650 llvm::Type *ITy =
651 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
652 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
653 Load->setVolatile(true);
654 return Load;
655}
656
657// Build a plain volatile store.
659 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
660 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
661 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
662 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
663 llvm::StoreInst *Store =
664 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
665 Store->setVolatile(true);
666 return Store;
667}
668
669// Emit a simple mangled intrinsic that has 1 argument and a return type
670// matching the argument type. Depending on mode, this may be a constrained
671// floating-point intrinsic.
673 const CallExpr *E, unsigned IntrinsicID,
674 unsigned ConstrainedIntrinsicID) {
675 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
676
677 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
678 if (CGF.Builder.getIsFPConstrained()) {
679 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
680 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
681 } else {
682 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
683 return CGF.Builder.CreateCall(F, Src0);
684 }
685}
686
687// Emit an intrinsic that has 2 operands of the same type as its result.
688// Depending on mode, this may be a constrained floating-point intrinsic.
690 const CallExpr *E, unsigned IntrinsicID,
691 unsigned ConstrainedIntrinsicID) {
692 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
693 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
694
695 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
696 if (CGF.Builder.getIsFPConstrained()) {
697 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
698 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
699 } else {
700 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
701 return CGF.Builder.CreateCall(F, { Src0, Src1 });
702 }
703}
704
705// Has second type mangled argument.
707 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
708 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
709 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
710 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
711
712 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
713 if (CGF.Builder.getIsFPConstrained()) {
714 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
715 {Src0->getType(), Src1->getType()});
716 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
717 }
718
719 Function *F =
720 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
721 return CGF.Builder.CreateCall(F, {Src0, Src1});
722}
723
724// Emit an intrinsic that has 3 operands of the same type as its result.
725// Depending on mode, this may be a constrained floating-point intrinsic.
727 const CallExpr *E, unsigned IntrinsicID,
728 unsigned ConstrainedIntrinsicID) {
729 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
730 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
731 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
732
733 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
734 if (CGF.Builder.getIsFPConstrained()) {
735 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
736 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
737 } else {
738 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
739 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
740 }
741}
742
743// Emit an intrinsic where all operands are of the same type as the result.
744// Depending on mode, this may be a constrained floating-point intrinsic.
746 unsigned IntrinsicID,
747 unsigned ConstrainedIntrinsicID,
748 llvm::Type *Ty,
749 ArrayRef<Value *> Args) {
750 Function *F;
751 if (CGF.Builder.getIsFPConstrained())
752 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
753 else
754 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
755
756 if (CGF.Builder.getIsFPConstrained())
757 return CGF.Builder.CreateConstrainedFPCall(F, Args);
758 else
759 return CGF.Builder.CreateCall(F, Args);
760}
761
762// Emit a simple intrinsic that has N scalar arguments and a return type
763// matching the argument type. It is assumed that only the first argument is
764// overloaded.
765template <unsigned N>
767 const CallExpr *E,
768 unsigned IntrinsicID,
769 llvm::StringRef Name = "") {
770 static_assert(N, "expect non-empty argument");
772 for (unsigned I = 0; I < N; ++I)
773 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
774 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
775 return CGF.Builder.CreateCall(F, Args, Name);
776}
777
778// Emit an intrinsic that has 1 float or double operand, and 1 integer.
780 const CallExpr *E,
781 unsigned IntrinsicID) {
782 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
783 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
784
785 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
786 return CGF.Builder.CreateCall(F, {Src0, Src1});
787}
788
789// Emit an intrinsic that has overloaded integer result and fp operand.
790static Value *
792 unsigned IntrinsicID,
793 unsigned ConstrainedIntrinsicID) {
794 llvm::Type *ResultType = CGF.ConvertType(E->getType());
795 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
796
797 if (CGF.Builder.getIsFPConstrained()) {
798 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
799 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
800 {ResultType, Src0->getType()});
801 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
802 } else {
803 Function *F =
804 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
805 return CGF.Builder.CreateCall(F, Src0);
806 }
807}
808
810 llvm::Intrinsic::ID IntrinsicID) {
811 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
812 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
813
814 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
815 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
816 llvm::Function *F =
817 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
818 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
819
820 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
821 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
822 CGF.EmitStoreOfScalar(Exp, LV);
823
824 return CGF.Builder.CreateExtractValue(Call, 0);
825}
826
828 llvm::Intrinsic::ID IntrinsicID) {
829 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0));
830 llvm::Value *Dest0 = CGF.EmitScalarExpr(E->getArg(1));
831 llvm::Value *Dest1 = CGF.EmitScalarExpr(E->getArg(2));
832
833 llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Val->getType()});
834 llvm::Value *Call = CGF.Builder.CreateCall(F, Val);
835
836 llvm::Value *SinResult = CGF.Builder.CreateExtractValue(Call, 0);
837 llvm::Value *CosResult = CGF.Builder.CreateExtractValue(Call, 1);
838
839 QualType DestPtrType = E->getArg(1)->getType()->getPointeeType();
840 LValue SinLV = CGF.MakeNaturalAlignAddrLValue(Dest0, DestPtrType);
841 LValue CosLV = CGF.MakeNaturalAlignAddrLValue(Dest1, DestPtrType);
842
843 llvm::StoreInst *StoreSin =
844 CGF.Builder.CreateStore(SinResult, SinLV.getAddress());
845 llvm::StoreInst *StoreCos =
846 CGF.Builder.CreateStore(CosResult, CosLV.getAddress());
847
848 // Mark the two stores as non-aliasing with each other. The order of stores
849 // emitted by this builtin is arbitrary, enforcing a particular order will
850 // prevent optimizations later on.
851 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
852 MDNode *Domain = MDHelper.createAnonymousAliasScopeDomain();
853 MDNode *AliasScope = MDHelper.createAnonymousAliasScope(Domain);
854 MDNode *AliasScopeList = MDNode::get(Call->getContext(), AliasScope);
855 StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList);
856 StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
857}
858
859/// EmitFAbs - Emit a call to @llvm.fabs().
861 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
862 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
863 Call->setDoesNotAccessMemory();
864 return Call;
865}
866
867/// Emit the computation of the sign bit for a floating point value. Returns
868/// the i1 sign bit value.
870 LLVMContext &C = CGF.CGM.getLLVMContext();
871
872 llvm::Type *Ty = V->getType();
873 int Width = Ty->getPrimitiveSizeInBits();
874 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
875 V = CGF.Builder.CreateBitCast(V, IntTy);
876 if (Ty->isPPC_FP128Ty()) {
877 // We want the sign bit of the higher-order double. The bitcast we just
878 // did works as if the double-double was stored to memory and then
879 // read as an i128. The "store" will put the higher-order double in the
880 // lower address in both little- and big-Endian modes, but the "load"
881 // will treat those bits as a different part of the i128: the low bits in
882 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
883 // we need to shift the high bits down to the low before truncating.
884 Width >>= 1;
885 if (CGF.getTarget().isBigEndian()) {
886 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
887 V = CGF.Builder.CreateLShr(V, ShiftCst);
888 }
889 // We are truncating value in order to extract the higher-order
890 // double, which we will be using to extract the sign from.
891 IntTy = llvm::IntegerType::get(C, Width);
892 V = CGF.Builder.CreateTrunc(V, IntTy);
893 }
894 Value *Zero = llvm::Constant::getNullValue(IntTy);
895 return CGF.Builder.CreateICmpSLT(V, Zero);
896}
897
898/// Checks no arguments or results are passed indirectly in the ABI (i.e. via a
899/// hidden pointer). This is used to check annotating FP libcalls (that could
900/// set `errno`) with "int" TBAA metadata is safe. If any floating-point
901/// arguments are passed indirectly, setup for the call could be incorrectly
902/// optimized out.
904 auto IsIndirect = [&](ABIArgInfo const &info) {
905 return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca();
906 };
907 return !IsIndirect(FnInfo.getReturnInfo()) &&
908 llvm::none_of(FnInfo.arguments(),
909 [&](CGFunctionInfoArgInfo const &ArgInfo) {
910 return IsIndirect(ArgInfo.info);
911 });
912}
913
915 const CallExpr *E, llvm::Constant *calleeValue) {
916 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
917 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
918 llvm::CallBase *callOrInvoke = nullptr;
919 CGFunctionInfo const *FnInfo = nullptr;
920 RValue Call =
921 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(),
922 /*Chain=*/nullptr, &callOrInvoke, &FnInfo);
923
924 if (unsigned BuiltinID = FD->getBuiltinID()) {
925 // Check whether a FP math builtin function, such as BI__builtin_expf
926 ASTContext &Context = CGF.getContext();
927 bool ConstWithoutErrnoAndExceptions =
929 // Restrict to target with errno, for example, MacOS doesn't set errno.
930 // TODO: Support builtin function with complex type returned, eg: cacosh
931 if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno &&
932 !CGF.Builder.getIsFPConstrained() && Call.isScalar() &&
934 // Emit "int" TBAA metadata on FP math libcalls.
935 clang::QualType IntTy = Context.IntTy;
936 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
937 CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo);
938 }
939 }
940 return Call;
941}
942
943/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
944/// depending on IntrinsicID.
945///
946/// \arg CGF The current codegen function.
947/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
948/// \arg X The first argument to the llvm.*.with.overflow.*.
949/// \arg Y The second argument to the llvm.*.with.overflow.*.
950/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
951/// \returns The result (i.e. sum/product) returned by the intrinsic.
952static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
953 const llvm::Intrinsic::ID IntrinsicID,
954 llvm::Value *X, llvm::Value *Y,
955 llvm::Value *&Carry) {
956 // Make sure we have integers of the same width.
957 assert(X->getType() == Y->getType() &&
958 "Arguments must be the same type. (Did you forget to make sure both "
959 "arguments have the same integer width?)");
960
961 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
962 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
963 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
964 return CGF.Builder.CreateExtractValue(Tmp, 0);
965}
966
967static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
968 int low, int high) {
969 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
970 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
971 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
972 Call->addRangeRetAttr(CR);
973 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
974 return Call;
975}
976
977namespace {
978 struct WidthAndSignedness {
979 unsigned Width;
980 bool Signed;
981 };
982}
983
984static WidthAndSignedness
986 const clang::QualType Type) {
987 assert(Type->isIntegerType() && "Given type is not an integer.");
988 unsigned Width = context.getIntWidth(Type);
990 return {Width, Signed};
991}
992
993// Given one or more integer types, this function produces an integer type that
994// encompasses them: any value in one of the given types could be expressed in
995// the encompassing type.
996static struct WidthAndSignedness
997EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
998 assert(Types.size() > 0 && "Empty list of types.");
999
1000 // If any of the given types is signed, we must return a signed type.
1001 bool Signed = false;
1002 for (const auto &Type : Types) {
1003 Signed |= Type.Signed;
1004 }
1005
1006 // The encompassing type must have a width greater than or equal to the width
1007 // of the specified types. Additionally, if the encompassing type is signed,
1008 // its width must be strictly greater than the width of any unsigned types
1009 // given.
1010 unsigned Width = 0;
1011 for (const auto &Type : Types) {
1012 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
1013 if (Width < MinWidth) {
1014 Width = MinWidth;
1015 }
1016 }
1017
1018 return {Width, Signed};
1019}
1020
1021Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
1022 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
1023 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
1024 ArgValue);
1025}
1026
1027/// Checks if using the result of __builtin_object_size(p, @p From) in place of
1028/// __builtin_object_size(p, @p To) is correct
1029static bool areBOSTypesCompatible(int From, int To) {
1030 // Note: Our __builtin_object_size implementation currently treats Type=0 and
1031 // Type=2 identically. Encoding this implementation detail here may make
1032 // improving __builtin_object_size difficult in the future, so it's omitted.
1033 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
1034}
1035
1036static llvm::Value *
1037getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
1038 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
1039}
1040
1041llvm::Value *
1042CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
1043 llvm::IntegerType *ResType,
1044 llvm::Value *EmittedE,
1045 bool IsDynamic) {
1046 uint64_t ObjectSize;
1047 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
1048 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
1049 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
1050}
1051
1053 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
1054 uint64_t &Offset) {
1055 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
1056 getLangOpts().getStrictFlexArraysLevel();
1057 uint32_t FieldNo = 0;
1058
1059 if (RD->isImplicit())
1060 return nullptr;
1061
1062 for (const FieldDecl *FD : RD->fields()) {
1063 if ((!FAMDecl || FD == FAMDecl) &&
1065 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
1066 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
1067 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1068 Offset += Layout.getFieldOffset(FieldNo);
1069 return FD;
1070 }
1071
1072 QualType Ty = FD->getType();
1073 if (Ty->isRecordType()) {
1075 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
1076 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1077 Offset += Layout.getFieldOffset(FieldNo);
1078 return Field;
1079 }
1080 }
1081
1082 if (!RD->isUnion())
1083 ++FieldNo;
1084 }
1085
1086 return nullptr;
1087}
1088
1089static unsigned CountCountedByAttrs(const RecordDecl *RD) {
1090 unsigned Num = 0;
1091
1092 for (const FieldDecl *FD : RD->fields()) {
1093 if (FD->getType()->isCountAttributedType())
1094 return ++Num;
1095
1096 QualType Ty = FD->getType();
1097 if (Ty->isRecordType())
1099 }
1100
1101 return Num;
1102}
1103
1104llvm::Value *
1105CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
1106 llvm::IntegerType *ResType) {
1107 // The code generated here calculates the size of a struct with a flexible
1108 // array member that uses the counted_by attribute. There are two instances
1109 // we handle:
1110 //
1111 // struct s {
1112 // unsigned long flags;
1113 // int count;
1114 // int array[] __attribute__((counted_by(count)));
1115 // }
1116 //
1117 // 1) bdos of the flexible array itself:
1118 //
1119 // __builtin_dynamic_object_size(p->array, 1) ==
1120 // p->count * sizeof(*p->array)
1121 //
1122 // 2) bdos of a pointer into the flexible array:
1123 //
1124 // __builtin_dynamic_object_size(&p->array[42], 1) ==
1125 // (p->count - 42) * sizeof(*p->array)
1126 //
1127 // 2) bdos of the whole struct, including the flexible array:
1128 //
1129 // __builtin_dynamic_object_size(p, 1) ==
1130 // max(sizeof(struct s),
1131 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1132 //
1133 ASTContext &Ctx = getContext();
1134 const Expr *Base = E->IgnoreParenImpCasts();
1135 const Expr *Idx = nullptr;
1136
1137 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
1138 UO && UO->getOpcode() == UO_AddrOf) {
1139 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
1140 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
1141 Base = ASE->getBase()->IgnoreParenImpCasts();
1142 Idx = ASE->getIdx()->IgnoreParenImpCasts();
1143
1144 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
1145 int64_t Val = IL->getValue().getSExtValue();
1146 if (Val < 0)
1147 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1148
1149 if (Val == 0)
1150 // The index is 0, so we don't need to take it into account.
1151 Idx = nullptr;
1152 }
1153 } else {
1154 // Potential pointer to another element in the struct.
1155 Base = SubExpr;
1156 }
1157 }
1158
1159 // Get the flexible array member Decl.
1160 const RecordDecl *OuterRD = nullptr;
1161 const FieldDecl *FAMDecl = nullptr;
1162 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
1163 // Check if \p Base is referencing the FAM itself.
1164 const ValueDecl *VD = ME->getMemberDecl();
1166 FAMDecl = dyn_cast<FieldDecl>(VD);
1167 if (!FAMDecl)
1168 return nullptr;
1169 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
1170 // Check if we're pointing to the whole struct.
1171 QualType Ty = DRE->getDecl()->getType();
1172 if (Ty->isPointerType())
1173 Ty = Ty->getPointeeType();
1174 OuterRD = Ty->getAsRecordDecl();
1175
1176 // If we have a situation like this:
1177 //
1178 // struct union_of_fams {
1179 // int flags;
1180 // union {
1181 // signed char normal_field;
1182 // struct {
1183 // int count1;
1184 // int arr1[] __counted_by(count1);
1185 // };
1186 // struct {
1187 // signed char count2;
1188 // int arr2[] __counted_by(count2);
1189 // };
1190 // };
1191 // };
1192 //
1193 // We don't know which 'count' to use in this scenario:
1194 //
1195 // size_t get_size(struct union_of_fams *p) {
1196 // return __builtin_dynamic_object_size(p, 1);
1197 // }
1198 //
1199 // Instead of calculating a wrong number, we give up.
1200 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
1201 return nullptr;
1202 }
1203
1204 if (!OuterRD)
1205 return nullptr;
1206
1207 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
1208 // get its offset.
1209 uint64_t Offset = 0;
1210 FAMDecl =
1211 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
1212 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
1213
1214 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
1215 // No flexible array member found or it doesn't have the "counted_by"
1216 // attribute.
1217 return nullptr;
1218
1219 const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
1220 if (!CountedByFD)
1221 // Can't find the field referenced by the "counted_by" attribute.
1222 return nullptr;
1223
1224 if (isa<DeclRefExpr>(Base))
1225 // The whole struct is specificed in the __bdos. The calculation of the
1226 // whole size of the structure can be done in two ways:
1227 //
1228 // 1) sizeof(struct S) + count * sizeof(typeof(fam))
1229 // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
1230 //
1231 // The first will add additional padding after the end of the array,
1232 // allocation while the second method is more precise, but not quite
1233 // expected from programmers. See
1234 // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
1235 // discussion of the topic.
1236 //
1237 // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
1238 // structure. Therefore, because of the above issue, we'll choose to match
1239 // what GCC does for consistency's sake.
1240 return nullptr;
1241
1242 // Build a load of the counted_by field.
1243 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1244 Value *CountedByInst = EmitLoadOfCountedByField(Base, FAMDecl, CountedByFD);
1245 if (!CountedByInst)
1246 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1247
1248 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1249
1250 // Build a load of the index and subtract it from the count.
1251 Value *IdxInst = nullptr;
1252 if (Idx) {
1253 if (Idx->HasSideEffects(getContext()))
1254 // We can't have side-effects.
1255 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1256
1257 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1258 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1259 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1260
1261 // We go ahead with the calculation here. If the index turns out to be
1262 // negative, we'll catch it at the end.
1263 CountedByInst =
1264 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1265 }
1266
1267 // Calculate how large the flexible array member is in bytes.
1268 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1270 llvm::Constant *ElemSize =
1271 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1272 Value *Res =
1273 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1274 Res = Builder.CreateIntCast(Res, ResType, IsSigned);
1275
1276 // A negative \p IdxInst or \p CountedByInst means that the index lands
1277 // outside of the flexible array member. If that's the case, we want to
1278 // return 0.
1279 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1280 if (IdxInst)
1281 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1282
1283 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1284}
1285
1286/// Returns a Value corresponding to the size of the given expression.
1287/// This Value may be either of the following:
1288/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1289/// it)
1290/// - A call to the @llvm.objectsize intrinsic
1291///
1292/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1293/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1294/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1295llvm::Value *
1296CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1297 llvm::IntegerType *ResType,
1298 llvm::Value *EmittedE, bool IsDynamic) {
1299 // We need to reference an argument if the pointer is a parameter with the
1300 // pass_object_size attribute.
1301 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1302 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1303 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1304 if (Param != nullptr && PS != nullptr &&
1305 areBOSTypesCompatible(PS->getType(), Type)) {
1306 auto Iter = SizeArguments.find(Param);
1307 assert(Iter != SizeArguments.end());
1308
1309 const ImplicitParamDecl *D = Iter->second;
1310 auto DIter = LocalDeclMap.find(D);
1311 assert(DIter != LocalDeclMap.end());
1312
1313 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1314 getContext().getSizeType(), E->getBeginLoc());
1315 }
1316 }
1317
1318 if (IsDynamic) {
1319 // Emit special code for a flexible array member with the "counted_by"
1320 // attribute.
1321 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1322 return V;
1323 }
1324
1325 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1326 // evaluate E for side-effects. In either case, we shouldn't lower to
1327 // @llvm.objectsize.
1328 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1329 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1330
1331 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1332 assert(Ptr->getType()->isPointerTy() &&
1333 "Non-pointer passed to __builtin_object_size?");
1334
1335 Function *F =
1336 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1337
1338 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1339 Value *Min = Builder.getInt1((Type & 2) != 0);
1340 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1341 Value *NullIsUnknown = Builder.getTrue();
1342 Value *Dynamic = Builder.getInt1(IsDynamic);
1343 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1344}
1345
1346namespace {
1347/// A struct to generically describe a bit test intrinsic.
1348struct BitTest {
1349 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1350 enum InterlockingKind : uint8_t {
1351 Unlocked,
1352 Sequential,
1353 Acquire,
1354 Release,
1355 NoFence
1356 };
1357
1358 ActionKind Action;
1359 InterlockingKind Interlocking;
1360 bool Is64Bit;
1361
1362 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1363};
1364
1365} // namespace
1366
1367BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1368 switch (BuiltinID) {
1369 // Main portable variants.
1370 case Builtin::BI_bittest:
1371 return {TestOnly, Unlocked, false};
1372 case Builtin::BI_bittestandcomplement:
1373 return {Complement, Unlocked, false};
1374 case Builtin::BI_bittestandreset:
1375 return {Reset, Unlocked, false};
1376 case Builtin::BI_bittestandset:
1377 return {Set, Unlocked, false};
1378 case Builtin::BI_interlockedbittestandreset:
1379 return {Reset, Sequential, false};
1380 case Builtin::BI_interlockedbittestandset:
1381 return {Set, Sequential, false};
1382
1383 // X86-specific 64-bit variants.
1384 case Builtin::BI_bittest64:
1385 return {TestOnly, Unlocked, true};
1386 case Builtin::BI_bittestandcomplement64:
1387 return {Complement, Unlocked, true};
1388 case Builtin::BI_bittestandreset64:
1389 return {Reset, Unlocked, true};
1390 case Builtin::BI_bittestandset64:
1391 return {Set, Unlocked, true};
1392 case Builtin::BI_interlockedbittestandreset64:
1393 return {Reset, Sequential, true};
1394 case Builtin::BI_interlockedbittestandset64:
1395 return {Set, Sequential, true};
1396
1397 // ARM/AArch64-specific ordering variants.
1398 case Builtin::BI_interlockedbittestandset_acq:
1399 return {Set, Acquire, false};
1400 case Builtin::BI_interlockedbittestandset_rel:
1401 return {Set, Release, false};
1402 case Builtin::BI_interlockedbittestandset_nf:
1403 return {Set, NoFence, false};
1404 case Builtin::BI_interlockedbittestandreset_acq:
1405 return {Reset, Acquire, false};
1406 case Builtin::BI_interlockedbittestandreset_rel:
1407 return {Reset, Release, false};
1408 case Builtin::BI_interlockedbittestandreset_nf:
1409 return {Reset, NoFence, false};
1410 }
1411 llvm_unreachable("expected only bittest intrinsics");
1412}
1413
1414static char bitActionToX86BTCode(BitTest::ActionKind A) {
1415 switch (A) {
1416 case BitTest::TestOnly: return '\0';
1417 case BitTest::Complement: return 'c';
1418 case BitTest::Reset: return 'r';
1419 case BitTest::Set: return 's';
1420 }
1421 llvm_unreachable("invalid action");
1422}
1423
1425 BitTest BT,
1426 const CallExpr *E, Value *BitBase,
1427 Value *BitPos) {
1428 char Action = bitActionToX86BTCode(BT.Action);
1429 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1430
1431 // Build the assembly.
1433 raw_svector_ostream AsmOS(Asm);
1434 if (BT.Interlocking != BitTest::Unlocked)
1435 AsmOS << "lock ";
1436 AsmOS << "bt";
1437 if (Action)
1438 AsmOS << Action;
1439 AsmOS << SizeSuffix << " $2, ($1)";
1440
1441 // Build the constraints. FIXME: We should support immediates when possible.
1442 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1443 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1444 if (!MachineClobbers.empty()) {
1445 Constraints += ',';
1446 Constraints += MachineClobbers;
1447 }
1448 llvm::IntegerType *IntType = llvm::IntegerType::get(
1449 CGF.getLLVMContext(),
1450 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1451 llvm::FunctionType *FTy =
1452 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1453
1454 llvm::InlineAsm *IA =
1455 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1456 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1457}
1458
1459static llvm::AtomicOrdering
1460getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1461 switch (I) {
1462 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1463 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1464 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1465 case BitTest::Release: return llvm::AtomicOrdering::Release;
1466 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1467 }
1468 llvm_unreachable("invalid interlocking");
1469}
1470
1471/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1472/// bits and a bit position and read and optionally modify the bit at that
1473/// position. The position index can be arbitrarily large, i.e. it can be larger
1474/// than 31 or 63, so we need an indexed load in the general case.
1475static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1476 unsigned BuiltinID,
1477 const CallExpr *E) {
1478 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1479 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1480
1481 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1482
1483 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1484 // indexing operation internally. Use them if possible.
1485 if (CGF.getTarget().getTriple().isX86())
1486 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1487
1488 // Otherwise, use generic code to load one byte and test the bit. Use all but
1489 // the bottom three bits as the array index, and the bottom three bits to form
1490 // a mask.
1491 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1492 Value *ByteIndex = CGF.Builder.CreateAShr(
1493 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1494 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBase, ByteIndex,
1495 "bittest.byteaddr"),
1496 CGF.Int8Ty, CharUnits::One());
1497 Value *PosLow =
1498 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1499 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1500
1501 // The updating instructions will need a mask.
1502 Value *Mask = nullptr;
1503 if (BT.Action != BitTest::TestOnly) {
1504 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1505 "bittest.mask");
1506 }
1507
1508 // Check the action and ordering of the interlocked intrinsics.
1509 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1510
1511 Value *OldByte = nullptr;
1512 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1513 // Emit a combined atomicrmw load/store operation for the interlocked
1514 // intrinsics.
1515 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1516 if (BT.Action == BitTest::Reset) {
1517 Mask = CGF.Builder.CreateNot(Mask);
1518 RMWOp = llvm::AtomicRMWInst::And;
1519 }
1520 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1521 } else {
1522 // Emit a plain load for the non-interlocked intrinsics.
1523 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1524 Value *NewByte = nullptr;
1525 switch (BT.Action) {
1526 case BitTest::TestOnly:
1527 // Don't store anything.
1528 break;
1529 case BitTest::Complement:
1530 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1531 break;
1532 case BitTest::Reset:
1533 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1534 break;
1535 case BitTest::Set:
1536 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1537 break;
1538 }
1539 if (NewByte)
1540 CGF.Builder.CreateStore(NewByte, ByteAddr);
1541 }
1542
1543 // However we loaded the old byte, either by plain load or atomicrmw, shift
1544 // the bit into the low position and mask it to 0 or 1.
1545 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1546 return CGF.Builder.CreateAnd(
1547 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1548}
1549
1551 unsigned BuiltinID,
1552 const CallExpr *E) {
1553 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1554
1556 raw_svector_ostream AsmOS(Asm);
1557 llvm::IntegerType *RetType = CGF.Int32Ty;
1558
1559 switch (BuiltinID) {
1560 case clang::PPC::BI__builtin_ppc_ldarx:
1561 AsmOS << "ldarx ";
1562 RetType = CGF.Int64Ty;
1563 break;
1564 case clang::PPC::BI__builtin_ppc_lwarx:
1565 AsmOS << "lwarx ";
1566 RetType = CGF.Int32Ty;
1567 break;
1568 case clang::PPC::BI__builtin_ppc_lharx:
1569 AsmOS << "lharx ";
1570 RetType = CGF.Int16Ty;
1571 break;
1572 case clang::PPC::BI__builtin_ppc_lbarx:
1573 AsmOS << "lbarx ";
1574 RetType = CGF.Int8Ty;
1575 break;
1576 default:
1577 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1578 }
1579
1580 AsmOS << "$0, ${1:y}";
1581
1582 std::string Constraints = "=r,*Z,~{memory}";
1583 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1584 if (!MachineClobbers.empty()) {
1585 Constraints += ',';
1586 Constraints += MachineClobbers;
1587 }
1588
1589 llvm::Type *PtrType = CGF.UnqualPtrTy;
1590 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1591
1592 llvm::InlineAsm *IA =
1593 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1594 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1595 CI->addParamAttr(
1596 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1597 return CI;
1598}
1599
1600namespace {
1601enum class MSVCSetJmpKind {
1602 _setjmpex,
1603 _setjmp3,
1604 _setjmp
1605};
1606}
1607
1608/// MSVC handles setjmp a bit differently on different platforms. On every
1609/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1610/// parameters can be passed as variadic arguments, but we always pass none.
1611static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1612 const CallExpr *E) {
1613 llvm::Value *Arg1 = nullptr;
1614 llvm::Type *Arg1Ty = nullptr;
1615 StringRef Name;
1616 bool IsVarArg = false;
1617 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1618 Name = "_setjmp3";
1619 Arg1Ty = CGF.Int32Ty;
1620 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1621 IsVarArg = true;
1622 } else {
1623 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1624 Arg1Ty = CGF.Int8PtrTy;
1625 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1626 Arg1 = CGF.Builder.CreateCall(
1627 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1628 } else
1629 Arg1 = CGF.Builder.CreateCall(
1630 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1631 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1632 }
1633
1634 // Mark the call site and declaration with ReturnsTwice.
1635 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1636 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1637 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1638 llvm::Attribute::ReturnsTwice);
1639 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1640 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1641 ReturnsTwiceAttr, /*Local=*/true);
1642
1643 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1644 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1645 llvm::Value *Args[] = {Buf, Arg1};
1646 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1647 CB->setAttributes(ReturnsTwiceAttr);
1648 return RValue::get(CB);
1649}
1650
1651// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1652// we handle them here.
1693 __fastfail,
1694};
1695
1696static std::optional<CodeGenFunction::MSVCIntrin>
1697translateArmToMsvcIntrin(unsigned BuiltinID) {
1698 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1699 switch (BuiltinID) {
1700 default:
1701 return std::nullopt;
1702 case clang::ARM::BI_BitScanForward:
1703 case clang::ARM::BI_BitScanForward64:
1704 return MSVCIntrin::_BitScanForward;
1705 case clang::ARM::BI_BitScanReverse:
1706 case clang::ARM::BI_BitScanReverse64:
1707 return MSVCIntrin::_BitScanReverse;
1708 case clang::ARM::BI_InterlockedAnd64:
1709 return MSVCIntrin::_InterlockedAnd;
1710 case clang::ARM::BI_InterlockedExchange64:
1711 return MSVCIntrin::_InterlockedExchange;
1712 case clang::ARM::BI_InterlockedExchangeAdd64:
1713 return MSVCIntrin::_InterlockedExchangeAdd;
1714 case clang::ARM::BI_InterlockedExchangeSub64:
1715 return MSVCIntrin::_InterlockedExchangeSub;
1716 case clang::ARM::BI_InterlockedOr64:
1717 return MSVCIntrin::_InterlockedOr;
1718 case clang::ARM::BI_InterlockedXor64:
1719 return MSVCIntrin::_InterlockedXor;
1720 case clang::ARM::BI_InterlockedDecrement64:
1721 return MSVCIntrin::_InterlockedDecrement;
1722 case clang::ARM::BI_InterlockedIncrement64:
1723 return MSVCIntrin::_InterlockedIncrement;
1724 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1725 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1726 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1727 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1728 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1729 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1730 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1731 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1732 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1733 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1734 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1735 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1736 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1737 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1738 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1739 case clang::ARM::BI_InterlockedExchange8_acq:
1740 case clang::ARM::BI_InterlockedExchange16_acq:
1741 case clang::ARM::BI_InterlockedExchange_acq:
1742 case clang::ARM::BI_InterlockedExchange64_acq:
1743 case clang::ARM::BI_InterlockedExchangePointer_acq:
1744 return MSVCIntrin::_InterlockedExchange_acq;
1745 case clang::ARM::BI_InterlockedExchange8_rel:
1746 case clang::ARM::BI_InterlockedExchange16_rel:
1747 case clang::ARM::BI_InterlockedExchange_rel:
1748 case clang::ARM::BI_InterlockedExchange64_rel:
1749 case clang::ARM::BI_InterlockedExchangePointer_rel:
1750 return MSVCIntrin::_InterlockedExchange_rel;
1751 case clang::ARM::BI_InterlockedExchange8_nf:
1752 case clang::ARM::BI_InterlockedExchange16_nf:
1753 case clang::ARM::BI_InterlockedExchange_nf:
1754 case clang::ARM::BI_InterlockedExchange64_nf:
1755 case clang::ARM::BI_InterlockedExchangePointer_nf:
1756 return MSVCIntrin::_InterlockedExchange_nf;
1757 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1758 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1759 case clang::ARM::BI_InterlockedCompareExchange_acq:
1760 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1761 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
1762 return MSVCIntrin::_InterlockedCompareExchange_acq;
1763 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1764 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1765 case clang::ARM::BI_InterlockedCompareExchange_rel:
1766 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1767 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
1768 return MSVCIntrin::_InterlockedCompareExchange_rel;
1769 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1770 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1771 case clang::ARM::BI_InterlockedCompareExchange_nf:
1772 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1773 return MSVCIntrin::_InterlockedCompareExchange_nf;
1774 case clang::ARM::BI_InterlockedOr8_acq:
1775 case clang::ARM::BI_InterlockedOr16_acq:
1776 case clang::ARM::BI_InterlockedOr_acq:
1777 case clang::ARM::BI_InterlockedOr64_acq:
1778 return MSVCIntrin::_InterlockedOr_acq;
1779 case clang::ARM::BI_InterlockedOr8_rel:
1780 case clang::ARM::BI_InterlockedOr16_rel:
1781 case clang::ARM::BI_InterlockedOr_rel:
1782 case clang::ARM::BI_InterlockedOr64_rel:
1783 return MSVCIntrin::_InterlockedOr_rel;
1784 case clang::ARM::BI_InterlockedOr8_nf:
1785 case clang::ARM::BI_InterlockedOr16_nf:
1786 case clang::ARM::BI_InterlockedOr_nf:
1787 case clang::ARM::BI_InterlockedOr64_nf:
1788 return MSVCIntrin::_InterlockedOr_nf;
1789 case clang::ARM::BI_InterlockedXor8_acq:
1790 case clang::ARM::BI_InterlockedXor16_acq:
1791 case clang::ARM::BI_InterlockedXor_acq:
1792 case clang::ARM::BI_InterlockedXor64_acq:
1793 return MSVCIntrin::_InterlockedXor_acq;
1794 case clang::ARM::BI_InterlockedXor8_rel:
1795 case clang::ARM::BI_InterlockedXor16_rel:
1796 case clang::ARM::BI_InterlockedXor_rel:
1797 case clang::ARM::BI_InterlockedXor64_rel:
1798 return MSVCIntrin::_InterlockedXor_rel;
1799 case clang::ARM::BI_InterlockedXor8_nf:
1800 case clang::ARM::BI_InterlockedXor16_nf:
1801 case clang::ARM::BI_InterlockedXor_nf:
1802 case clang::ARM::BI_InterlockedXor64_nf:
1803 return MSVCIntrin::_InterlockedXor_nf;
1804 case clang::ARM::BI_InterlockedAnd8_acq:
1805 case clang::ARM::BI_InterlockedAnd16_acq:
1806 case clang::ARM::BI_InterlockedAnd_acq:
1807 case clang::ARM::BI_InterlockedAnd64_acq:
1808 return MSVCIntrin::_InterlockedAnd_acq;
1809 case clang::ARM::BI_InterlockedAnd8_rel:
1810 case clang::ARM::BI_InterlockedAnd16_rel:
1811 case clang::ARM::BI_InterlockedAnd_rel:
1812 case clang::ARM::BI_InterlockedAnd64_rel:
1813 return MSVCIntrin::_InterlockedAnd_rel;
1814 case clang::ARM::BI_InterlockedAnd8_nf:
1815 case clang::ARM::BI_InterlockedAnd16_nf:
1816 case clang::ARM::BI_InterlockedAnd_nf:
1817 case clang::ARM::BI_InterlockedAnd64_nf:
1818 return MSVCIntrin::_InterlockedAnd_nf;
1819 case clang::ARM::BI_InterlockedIncrement16_acq:
1820 case clang::ARM::BI_InterlockedIncrement_acq:
1821 case clang::ARM::BI_InterlockedIncrement64_acq:
1822 return MSVCIntrin::_InterlockedIncrement_acq;
1823 case clang::ARM::BI_InterlockedIncrement16_rel:
1824 case clang::ARM::BI_InterlockedIncrement_rel:
1825 case clang::ARM::BI_InterlockedIncrement64_rel:
1826 return MSVCIntrin::_InterlockedIncrement_rel;
1827 case clang::ARM::BI_InterlockedIncrement16_nf:
1828 case clang::ARM::BI_InterlockedIncrement_nf:
1829 case clang::ARM::BI_InterlockedIncrement64_nf:
1830 return MSVCIntrin::_InterlockedIncrement_nf;
1831 case clang::ARM::BI_InterlockedDecrement16_acq:
1832 case clang::ARM::BI_InterlockedDecrement_acq:
1833 case clang::ARM::BI_InterlockedDecrement64_acq:
1834 return MSVCIntrin::_InterlockedDecrement_acq;
1835 case clang::ARM::BI_InterlockedDecrement16_rel:
1836 case clang::ARM::BI_InterlockedDecrement_rel:
1837 case clang::ARM::BI_InterlockedDecrement64_rel:
1838 return MSVCIntrin::_InterlockedDecrement_rel;
1839 case clang::ARM::BI_InterlockedDecrement16_nf:
1840 case clang::ARM::BI_InterlockedDecrement_nf:
1841 case clang::ARM::BI_InterlockedDecrement64_nf:
1842 return MSVCIntrin::_InterlockedDecrement_nf;
1843 }
1844 llvm_unreachable("must return from switch");
1845}
1846
1847static std::optional<CodeGenFunction::MSVCIntrin>
1848translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1849 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1850 switch (BuiltinID) {
1851 default:
1852 return std::nullopt;
1853 case clang::AArch64::BI_BitScanForward:
1854 case clang::AArch64::BI_BitScanForward64:
1855 return MSVCIntrin::_BitScanForward;
1856 case clang::AArch64::BI_BitScanReverse:
1857 case clang::AArch64::BI_BitScanReverse64:
1858 return MSVCIntrin::_BitScanReverse;
1859 case clang::AArch64::BI_InterlockedAnd64:
1860 return MSVCIntrin::_InterlockedAnd;
1861 case clang::AArch64::BI_InterlockedExchange64:
1862 return MSVCIntrin::_InterlockedExchange;
1863 case clang::AArch64::BI_InterlockedExchangeAdd64:
1864 return MSVCIntrin::_InterlockedExchangeAdd;
1865 case clang::AArch64::BI_InterlockedExchangeSub64:
1866 return MSVCIntrin::_InterlockedExchangeSub;
1867 case clang::AArch64::BI_InterlockedOr64:
1868 return MSVCIntrin::_InterlockedOr;
1869 case clang::AArch64::BI_InterlockedXor64:
1870 return MSVCIntrin::_InterlockedXor;
1871 case clang::AArch64::BI_InterlockedDecrement64:
1872 return MSVCIntrin::_InterlockedDecrement;
1873 case clang::AArch64::BI_InterlockedIncrement64:
1874 return MSVCIntrin::_InterlockedIncrement;
1875 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1876 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1877 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1878 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1879 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1880 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1881 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1882 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1883 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1884 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1885 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1886 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1887 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1888 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1889 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1890 case clang::AArch64::BI_InterlockedExchange8_acq:
1891 case clang::AArch64::BI_InterlockedExchange16_acq:
1892 case clang::AArch64::BI_InterlockedExchange_acq:
1893 case clang::AArch64::BI_InterlockedExchange64_acq:
1894 case clang::AArch64::BI_InterlockedExchangePointer_acq:
1895 return MSVCIntrin::_InterlockedExchange_acq;
1896 case clang::AArch64::BI_InterlockedExchange8_rel:
1897 case clang::AArch64::BI_InterlockedExchange16_rel:
1898 case clang::AArch64::BI_InterlockedExchange_rel:
1899 case clang::AArch64::BI_InterlockedExchange64_rel:
1900 case clang::AArch64::BI_InterlockedExchangePointer_rel:
1901 return MSVCIntrin::_InterlockedExchange_rel;
1902 case clang::AArch64::BI_InterlockedExchange8_nf:
1903 case clang::AArch64::BI_InterlockedExchange16_nf:
1904 case clang::AArch64::BI_InterlockedExchange_nf:
1905 case clang::AArch64::BI_InterlockedExchange64_nf:
1906 case clang::AArch64::BI_InterlockedExchangePointer_nf:
1907 return MSVCIntrin::_InterlockedExchange_nf;
1908 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1909 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1910 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1911 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1912 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
1913 return MSVCIntrin::_InterlockedCompareExchange_acq;
1914 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1915 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1916 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1917 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1918 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
1919 return MSVCIntrin::_InterlockedCompareExchange_rel;
1920 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1921 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1922 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1923 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1924 return MSVCIntrin::_InterlockedCompareExchange_nf;
1925 case clang::AArch64::BI_InterlockedCompareExchange128:
1926 return MSVCIntrin::_InterlockedCompareExchange128;
1927 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1928 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1929 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1930 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1931 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1932 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1933 case clang::AArch64::BI_InterlockedOr8_acq:
1934 case clang::AArch64::BI_InterlockedOr16_acq:
1935 case clang::AArch64::BI_InterlockedOr_acq:
1936 case clang::AArch64::BI_InterlockedOr64_acq:
1937 return MSVCIntrin::_InterlockedOr_acq;
1938 case clang::AArch64::BI_InterlockedOr8_rel:
1939 case clang::AArch64::BI_InterlockedOr16_rel:
1940 case clang::AArch64::BI_InterlockedOr_rel:
1941 case clang::AArch64::BI_InterlockedOr64_rel:
1942 return MSVCIntrin::_InterlockedOr_rel;
1943 case clang::AArch64::BI_InterlockedOr8_nf:
1944 case clang::AArch64::BI_InterlockedOr16_nf:
1945 case clang::AArch64::BI_InterlockedOr_nf:
1946 case clang::AArch64::BI_InterlockedOr64_nf:
1947 return MSVCIntrin::_InterlockedOr_nf;
1948 case clang::AArch64::BI_InterlockedXor8_acq:
1949 case clang::AArch64::BI_InterlockedXor16_acq:
1950 case clang::AArch64::BI_InterlockedXor_acq:
1951 case clang::AArch64::BI_InterlockedXor64_acq:
1952 return MSVCIntrin::_InterlockedXor_acq;
1953 case clang::AArch64::BI_InterlockedXor8_rel:
1954 case clang::AArch64::BI_InterlockedXor16_rel:
1955 case clang::AArch64::BI_InterlockedXor_rel:
1956 case clang::AArch64::BI_InterlockedXor64_rel:
1957 return MSVCIntrin::_InterlockedXor_rel;
1958 case clang::AArch64::BI_InterlockedXor8_nf:
1959 case clang::AArch64::BI_InterlockedXor16_nf:
1960 case clang::AArch64::BI_InterlockedXor_nf:
1961 case clang::AArch64::BI_InterlockedXor64_nf:
1962 return MSVCIntrin::_InterlockedXor_nf;
1963 case clang::AArch64::BI_InterlockedAnd8_acq:
1964 case clang::AArch64::BI_InterlockedAnd16_acq:
1965 case clang::AArch64::BI_InterlockedAnd_acq:
1966 case clang::AArch64::BI_InterlockedAnd64_acq:
1967 return MSVCIntrin::_InterlockedAnd_acq;
1968 case clang::AArch64::BI_InterlockedAnd8_rel:
1969 case clang::AArch64::BI_InterlockedAnd16_rel:
1970 case clang::AArch64::BI_InterlockedAnd_rel:
1971 case clang::AArch64::BI_InterlockedAnd64_rel:
1972 return MSVCIntrin::_InterlockedAnd_rel;
1973 case clang::AArch64::BI_InterlockedAnd8_nf:
1974 case clang::AArch64::BI_InterlockedAnd16_nf:
1975 case clang::AArch64::BI_InterlockedAnd_nf:
1976 case clang::AArch64::BI_InterlockedAnd64_nf:
1977 return MSVCIntrin::_InterlockedAnd_nf;
1978 case clang::AArch64::BI_InterlockedIncrement16_acq:
1979 case clang::AArch64::BI_InterlockedIncrement_acq:
1980 case clang::AArch64::BI_InterlockedIncrement64_acq:
1981 return MSVCIntrin::_InterlockedIncrement_acq;
1982 case clang::AArch64::BI_InterlockedIncrement16_rel:
1983 case clang::AArch64::BI_InterlockedIncrement_rel:
1984 case clang::AArch64::BI_InterlockedIncrement64_rel:
1985 return MSVCIntrin::_InterlockedIncrement_rel;
1986 case clang::AArch64::BI_InterlockedIncrement16_nf:
1987 case clang::AArch64::BI_InterlockedIncrement_nf:
1988 case clang::AArch64::BI_InterlockedIncrement64_nf:
1989 return MSVCIntrin::_InterlockedIncrement_nf;
1990 case clang::AArch64::BI_InterlockedDecrement16_acq:
1991 case clang::AArch64::BI_InterlockedDecrement_acq:
1992 case clang::AArch64::BI_InterlockedDecrement64_acq:
1993 return MSVCIntrin::_InterlockedDecrement_acq;
1994 case clang::AArch64::BI_InterlockedDecrement16_rel:
1995 case clang::AArch64::BI_InterlockedDecrement_rel:
1996 case clang::AArch64::BI_InterlockedDecrement64_rel:
1997 return MSVCIntrin::_InterlockedDecrement_rel;
1998 case clang::AArch64::BI_InterlockedDecrement16_nf:
1999 case clang::AArch64::BI_InterlockedDecrement_nf:
2000 case clang::AArch64::BI_InterlockedDecrement64_nf:
2001 return MSVCIntrin::_InterlockedDecrement_nf;
2002 }
2003 llvm_unreachable("must return from switch");
2004}
2005
2006static std::optional<CodeGenFunction::MSVCIntrin>
2007translateX86ToMsvcIntrin(unsigned BuiltinID) {
2008 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
2009 switch (BuiltinID) {
2010 default:
2011 return std::nullopt;
2012 case clang::X86::BI_BitScanForward:
2013 case clang::X86::BI_BitScanForward64:
2014 return MSVCIntrin::_BitScanForward;
2015 case clang::X86::BI_BitScanReverse:
2016 case clang::X86::BI_BitScanReverse64:
2017 return MSVCIntrin::_BitScanReverse;
2018 case clang::X86::BI_InterlockedAnd64:
2019 return MSVCIntrin::_InterlockedAnd;
2020 case clang::X86::BI_InterlockedCompareExchange128:
2021 return MSVCIntrin::_InterlockedCompareExchange128;
2022 case clang::X86::BI_InterlockedExchange64:
2023 return MSVCIntrin::_InterlockedExchange;
2024 case clang::X86::BI_InterlockedExchangeAdd64:
2025 return MSVCIntrin::_InterlockedExchangeAdd;
2026 case clang::X86::BI_InterlockedExchangeSub64:
2027 return MSVCIntrin::_InterlockedExchangeSub;
2028 case clang::X86::BI_InterlockedOr64:
2029 return MSVCIntrin::_InterlockedOr;
2030 case clang::X86::BI_InterlockedXor64:
2031 return MSVCIntrin::_InterlockedXor;
2032 case clang::X86::BI_InterlockedDecrement64:
2033 return MSVCIntrin::_InterlockedDecrement;
2034 case clang::X86::BI_InterlockedIncrement64:
2035 return MSVCIntrin::_InterlockedIncrement;
2036 }
2037 llvm_unreachable("must return from switch");
2038}
2039
2040// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
2041Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
2042 const CallExpr *E) {
2043 switch (BuiltinID) {
2044 case MSVCIntrin::_BitScanForward:
2045 case MSVCIntrin::_BitScanReverse: {
2046 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
2047 Value *ArgValue = EmitScalarExpr(E->getArg(1));
2048
2049 llvm::Type *ArgType = ArgValue->getType();
2050 llvm::Type *IndexType = IndexAddress.getElementType();
2051 llvm::Type *ResultType = ConvertType(E->getType());
2052
2053 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
2054 Value *ResZero = llvm::Constant::getNullValue(ResultType);
2055 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
2056
2057 BasicBlock *Begin = Builder.GetInsertBlock();
2058 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
2059 Builder.SetInsertPoint(End);
2060 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
2061
2062 Builder.SetInsertPoint(Begin);
2063 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
2064 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
2065 Builder.CreateCondBr(IsZero, End, NotZero);
2066 Result->addIncoming(ResZero, Begin);
2067
2068 Builder.SetInsertPoint(NotZero);
2069
2070 if (BuiltinID == MSVCIntrin::_BitScanForward) {
2071 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2072 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2073 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2074 Builder.CreateStore(ZeroCount, IndexAddress, false);
2075 } else {
2076 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
2077 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
2078
2079 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2080 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2081 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2082 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
2083 Builder.CreateStore(Index, IndexAddress, false);
2084 }
2085 Builder.CreateBr(End);
2086 Result->addIncoming(ResOne, NotZero);
2087
2088 Builder.SetInsertPoint(End);
2089 return Result;
2090 }
2091 case MSVCIntrin::_InterlockedAnd:
2092 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
2093 case MSVCIntrin::_InterlockedExchange:
2094 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
2095 case MSVCIntrin::_InterlockedExchangeAdd:
2096 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
2097 case MSVCIntrin::_InterlockedExchangeSub:
2098 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
2099 case MSVCIntrin::_InterlockedOr:
2100 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
2101 case MSVCIntrin::_InterlockedXor:
2102 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
2103 case MSVCIntrin::_InterlockedExchangeAdd_acq:
2104 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2105 AtomicOrdering::Acquire);
2106 case MSVCIntrin::_InterlockedExchangeAdd_rel:
2107 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2108 AtomicOrdering::Release);
2109 case MSVCIntrin::_InterlockedExchangeAdd_nf:
2110 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2111 AtomicOrdering::Monotonic);
2112 case MSVCIntrin::_InterlockedExchange_acq:
2113 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2114 AtomicOrdering::Acquire);
2115 case MSVCIntrin::_InterlockedExchange_rel:
2116 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2117 AtomicOrdering::Release);
2118 case MSVCIntrin::_InterlockedExchange_nf:
2119 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2120 AtomicOrdering::Monotonic);
2121 case MSVCIntrin::_InterlockedCompareExchange:
2122 return EmitAtomicCmpXchgForMSIntrin(*this, E);
2123 case MSVCIntrin::_InterlockedCompareExchange_acq:
2124 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
2125 case MSVCIntrin::_InterlockedCompareExchange_rel:
2126 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
2127 case MSVCIntrin::_InterlockedCompareExchange_nf:
2128 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2129 case MSVCIntrin::_InterlockedCompareExchange128:
2131 *this, E, AtomicOrdering::SequentiallyConsistent);
2132 case MSVCIntrin::_InterlockedCompareExchange128_acq:
2133 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
2134 case MSVCIntrin::_InterlockedCompareExchange128_rel:
2135 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
2136 case MSVCIntrin::_InterlockedCompareExchange128_nf:
2137 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2138 case MSVCIntrin::_InterlockedOr_acq:
2139 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2140 AtomicOrdering::Acquire);
2141 case MSVCIntrin::_InterlockedOr_rel:
2142 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2143 AtomicOrdering::Release);
2144 case MSVCIntrin::_InterlockedOr_nf:
2145 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2146 AtomicOrdering::Monotonic);
2147 case MSVCIntrin::_InterlockedXor_acq:
2148 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2149 AtomicOrdering::Acquire);
2150 case MSVCIntrin::_InterlockedXor_rel:
2151 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2152 AtomicOrdering::Release);
2153 case MSVCIntrin::_InterlockedXor_nf:
2154 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2155 AtomicOrdering::Monotonic);
2156 case MSVCIntrin::_InterlockedAnd_acq:
2157 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2158 AtomicOrdering::Acquire);
2159 case MSVCIntrin::_InterlockedAnd_rel:
2160 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2161 AtomicOrdering::Release);
2162 case MSVCIntrin::_InterlockedAnd_nf:
2163 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2164 AtomicOrdering::Monotonic);
2165 case MSVCIntrin::_InterlockedIncrement_acq:
2166 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
2167 case MSVCIntrin::_InterlockedIncrement_rel:
2168 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
2169 case MSVCIntrin::_InterlockedIncrement_nf:
2170 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
2171 case MSVCIntrin::_InterlockedDecrement_acq:
2172 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
2173 case MSVCIntrin::_InterlockedDecrement_rel:
2174 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
2175 case MSVCIntrin::_InterlockedDecrement_nf:
2176 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
2177
2178 case MSVCIntrin::_InterlockedDecrement:
2179 return EmitAtomicDecrementValue(*this, E);
2180 case MSVCIntrin::_InterlockedIncrement:
2181 return EmitAtomicIncrementValue(*this, E);
2182
2183 case MSVCIntrin::__fastfail: {
2184 // Request immediate process termination from the kernel. The instruction
2185 // sequences to do this are documented on MSDN:
2186 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2187 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
2188 StringRef Asm, Constraints;
2189 switch (ISA) {
2190 default:
2191 ErrorUnsupported(E, "__fastfail call for this architecture");
2192 break;
2193 case llvm::Triple::x86:
2194 case llvm::Triple::x86_64:
2195 Asm = "int $$0x29";
2196 Constraints = "{cx}";
2197 break;
2198 case llvm::Triple::thumb:
2199 Asm = "udf #251";
2200 Constraints = "{r0}";
2201 break;
2202 case llvm::Triple::aarch64:
2203 Asm = "brk #0xF003";
2204 Constraints = "{w0}";
2205 }
2206 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
2207 llvm::InlineAsm *IA =
2208 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
2209 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2210 getLLVMContext(), llvm::AttributeList::FunctionIndex,
2211 llvm::Attribute::NoReturn);
2212 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
2213 CI->setAttributes(NoReturnAttr);
2214 return CI;
2215 }
2216 }
2217 llvm_unreachable("Incorrect MSVC intrinsic!");
2218}
2219
2220namespace {
2221// ARC cleanup for __builtin_os_log_format
2222struct CallObjCArcUse final : EHScopeStack::Cleanup {
2223 CallObjCArcUse(llvm::Value *object) : object(object) {}
2224 llvm::Value *object;
2225
2226 void Emit(CodeGenFunction &CGF, Flags flags) override {
2227 CGF.EmitARCIntrinsicUse(object);
2228 }
2229};
2230}
2231
2233 BuiltinCheckKind Kind) {
2234 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
2235 "Unsupported builtin check kind");
2236
2237 Value *ArgValue = EmitScalarExpr(E);
2238 if (!SanOpts.has(SanitizerKind::Builtin))
2239 return ArgValue;
2240
2241 SanitizerScope SanScope(this);
2242 Value *Cond = Builder.CreateICmpNE(
2243 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2244 EmitCheck(std::make_pair(Cond, SanitizerKind::SO_Builtin),
2245 SanitizerHandler::InvalidBuiltin,
2247 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2248 {});
2249 return ArgValue;
2250}
2251
2253 Value *ArgValue = EvaluateExprAsBool(E);
2254 if (!SanOpts.has(SanitizerKind::Builtin))
2255 return ArgValue;
2256
2257 SanitizerScope SanScope(this);
2258 EmitCheck(
2259 std::make_pair(ArgValue, SanitizerKind::SO_Builtin),
2260 SanitizerHandler::InvalidBuiltin,
2262 llvm::ConstantInt::get(Builder.getInt8Ty(), BCK_AssumePassedFalse)},
2263 std::nullopt);
2264 return ArgValue;
2265}
2266
2267static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2268 return CGF.Builder.CreateBinaryIntrinsic(
2269 Intrinsic::abs, ArgValue,
2270 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2271}
2272
2274 bool SanitizeOverflow) {
2275 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2276
2277 // Try to eliminate overflow check.
2278 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2279 if (!VCI->isMinSignedValue())
2280 return EmitAbs(CGF, ArgValue, true);
2281 }
2282
2283 CodeGenFunction::SanitizerScope SanScope(&CGF);
2284
2285 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2286 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2287 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2288 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2289 Value *NotOverflow = CGF.Builder.CreateNot(
2290 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2291
2292 // TODO: support -ftrapv-handler.
2293 if (SanitizeOverflow) {
2294 CGF.EmitCheck({{NotOverflow, SanitizerKind::SO_SignedIntegerOverflow}},
2295 SanitizerHandler::NegateOverflow,
2296 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2298 {ArgValue});
2299 } else
2300 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2301
2302 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2303 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2304}
2305
2306/// Get the argument type for arguments to os_log_helper.
2308 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2309 return C.getCanonicalType(UnsignedTy);
2310}
2311
2314 CharUnits BufferAlignment) {
2315 ASTContext &Ctx = getContext();
2316
2318 {
2319 raw_svector_ostream OS(Name);
2320 OS << "__os_log_helper";
2321 OS << "_" << BufferAlignment.getQuantity();
2322 OS << "_" << int(Layout.getSummaryByte());
2323 OS << "_" << int(Layout.getNumArgsByte());
2324 for (const auto &Item : Layout.Items)
2325 OS << "_" << int(Item.getSizeByte()) << "_"
2326 << int(Item.getDescriptorByte());
2327 }
2328
2329 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2330 return F;
2331
2333 FunctionArgList Args;
2334 Args.push_back(ImplicitParamDecl::Create(
2335 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2337 ArgTys.emplace_back(Ctx.VoidPtrTy);
2338
2339 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2340 char Size = Layout.Items[I].getSizeByte();
2341 if (!Size)
2342 continue;
2343
2344 QualType ArgTy = getOSLogArgType(Ctx, Size);
2345 Args.push_back(ImplicitParamDecl::Create(
2346 Ctx, nullptr, SourceLocation(),
2347 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2349 ArgTys.emplace_back(ArgTy);
2350 }
2351
2352 QualType ReturnTy = Ctx.VoidTy;
2353
2354 // The helper function has linkonce_odr linkage to enable the linker to merge
2355 // identical functions. To ensure the merging always happens, 'noinline' is
2356 // attached to the function when compiling with -Oz.
2357 const CGFunctionInfo &FI =
2359 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2360 llvm::Function *Fn = llvm::Function::Create(
2361 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2362 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2363 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2365 Fn->setDoesNotThrow();
2366
2367 // Attach 'noinline' at -Oz.
2368 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2369 Fn->addFnAttr(llvm::Attribute::NoInline);
2370
2371 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2372 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2373
2374 // Create a scope with an artificial location for the body of this function.
2375 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2376
2377 CharUnits Offset;
2379 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2380 BufferAlignment);
2381 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2382 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2383 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2384 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2385
2386 unsigned I = 1;
2387 for (const auto &Item : Layout.Items) {
2389 Builder.getInt8(Item.getDescriptorByte()),
2390 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2392 Builder.getInt8(Item.getSizeByte()),
2393 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2394
2395 CharUnits Size = Item.size();
2396 if (!Size.getQuantity())
2397 continue;
2398
2399 Address Arg = GetAddrOfLocalVar(Args[I]);
2400 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2401 Addr = Addr.withElementType(Arg.getElementType());
2403 Offset += Size;
2404 ++I;
2405 }
2406
2408
2409 return Fn;
2410}
2411
2413 assert(E.getNumArgs() >= 2 &&
2414 "__builtin_os_log_format takes at least 2 arguments");
2415 ASTContext &Ctx = getContext();
2418 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2419 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2420
2421 // Ignore argument 1, the format string. It is not currently used.
2422 CallArgList Args;
2423 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2424
2425 for (const auto &Item : Layout.Items) {
2426 int Size = Item.getSizeByte();
2427 if (!Size)
2428 continue;
2429
2430 llvm::Value *ArgVal;
2431
2432 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2433 uint64_t Val = 0;
2434 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2435 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2436 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2437 } else if (const Expr *TheExpr = Item.getExpr()) {
2438 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2439
2440 // If a temporary object that requires destruction after the full
2441 // expression is passed, push a lifetime-extended cleanup to extend its
2442 // lifetime to the end of the enclosing block scope.
2443 auto LifetimeExtendObject = [&](const Expr *E) {
2444 E = E->IgnoreParenCasts();
2445 // Extend lifetimes of objects returned by function calls and message
2446 // sends.
2447
2448 // FIXME: We should do this in other cases in which temporaries are
2449 // created including arguments of non-ARC types (e.g., C++
2450 // temporaries).
2451 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2452 return true;
2453 return false;
2454 };
2455
2456 if (TheExpr->getType()->isObjCRetainableType() &&
2457 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2458 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2459 "Only scalar can be a ObjC retainable type");
2460 if (!isa<Constant>(ArgVal)) {
2461 CleanupKind Cleanup = getARCCleanupKind();
2462 QualType Ty = TheExpr->getType();
2464 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2465 ArgVal = EmitARCRetain(Ty, ArgVal);
2466 Builder.CreateStore(ArgVal, Addr);
2467 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2469 Cleanup & EHCleanup);
2470
2471 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2472 // argument has to be alive.
2473 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2474 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2475 }
2476 }
2477 } else {
2478 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2479 }
2480
2481 unsigned ArgValSize =
2482 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2483 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2484 ArgValSize);
2485 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2486 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2487 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2488 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2489 Args.add(RValue::get(ArgVal), ArgTy);
2490 }
2491
2492 const CGFunctionInfo &FI =
2495 Layout, BufAddr.getAlignment());
2497 return RValue::get(BufAddr, *this);
2498}
2499
2501 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2502 WidthAndSignedness ResultInfo) {
2503 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2504 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2505 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2506}
2507
2509 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2510 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2511 const clang::Expr *ResultArg, QualType ResultQTy,
2512 WidthAndSignedness ResultInfo) {
2514 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2515 "Cannot specialize this multiply");
2516
2517 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2518 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2519
2520 llvm::Value *HasOverflow;
2521 llvm::Value *Result = EmitOverflowIntrinsic(
2522 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2523
2524 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2525 // however, since the original builtin had a signed result, we need to report
2526 // an overflow when the result is greater than INT_MAX.
2527 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2528 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2529
2530 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2531 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2532
2533 bool isVolatile =
2534 ResultArg->getType()->getPointeeType().isVolatileQualified();
2535 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2536 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2537 isVolatile);
2538 return RValue::get(HasOverflow);
2539}
2540
2541/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2542static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2543 WidthAndSignedness Op1Info,
2544 WidthAndSignedness Op2Info,
2545 WidthAndSignedness ResultInfo) {
2546 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2547 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2548 Op1Info.Signed != Op2Info.Signed;
2549}
2550
2551/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2552/// the generic checked-binop irgen.
2553static RValue
2555 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2556 WidthAndSignedness Op2Info,
2557 const clang::Expr *ResultArg, QualType ResultQTy,
2558 WidthAndSignedness ResultInfo) {
2559 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2560 Op2Info, ResultInfo) &&
2561 "Not a mixed-sign multipliction we can specialize");
2562
2563 // Emit the signed and unsigned operands.
2564 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2565 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2566 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2567 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2568 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2569 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2570
2571 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2572 if (SignedOpWidth < UnsignedOpWidth)
2573 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2574 if (UnsignedOpWidth < SignedOpWidth)
2575 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2576
2577 llvm::Type *OpTy = Signed->getType();
2578 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2579 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2580 llvm::Type *ResTy = ResultPtr.getElementType();
2581 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2582
2583 // Take the absolute value of the signed operand.
2584 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2585 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2586 llvm::Value *AbsSigned =
2587 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2588
2589 // Perform a checked unsigned multiplication.
2590 llvm::Value *UnsignedOverflow;
2591 llvm::Value *UnsignedResult =
2592 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2593 Unsigned, UnsignedOverflow);
2594
2595 llvm::Value *Overflow, *Result;
2596 if (ResultInfo.Signed) {
2597 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2598 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2599 auto IntMax =
2600 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2601 llvm::Value *MaxResult =
2602 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2603 CGF.Builder.CreateZExt(IsNegative, OpTy));
2604 llvm::Value *SignedOverflow =
2605 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2606 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2607
2608 // Prepare the signed result (possibly by negating it).
2609 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2610 llvm::Value *SignedResult =
2611 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2612 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2613 } else {
2614 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2615 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2616 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2617 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2618 if (ResultInfo.Width < OpWidth) {
2619 auto IntMax =
2620 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2621 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2622 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2623 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2624 }
2625
2626 // Negate the product if it would be negative in infinite precision.
2627 Result = CGF.Builder.CreateSelect(
2628 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2629
2630 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2631 }
2632 assert(Overflow && Result && "Missing overflow or result");
2633
2634 bool isVolatile =
2635 ResultArg->getType()->getPointeeType().isVolatileQualified();
2636 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2637 isVolatile);
2638 return RValue::get(Overflow);
2639}
2640
2641static bool
2643 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2644 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2645 Ty = Ctx.getBaseElementType(Arr);
2646
2647 const auto *Record = Ty->getAsCXXRecordDecl();
2648 if (!Record)
2649 return false;
2650
2651 // We've already checked this type, or are in the process of checking it.
2652 if (!Seen.insert(Record).second)
2653 return false;
2654
2655 assert(Record->hasDefinition() &&
2656 "Incomplete types should already be diagnosed");
2657
2658 if (Record->isDynamicClass())
2659 return true;
2660
2661 for (FieldDecl *F : Record->fields()) {
2662 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2663 return true;
2664 }
2665 return false;
2666}
2667
2668/// Determine if the specified type requires laundering by checking if it is a
2669/// dynamic class type or contains a subobject which is a dynamic class type.
2671 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2672 return false;
2674 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2675}
2676
2677RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2678 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2679 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2680
2681 // The builtin's shift arg may have a different type than the source arg and
2682 // result, but the LLVM intrinsic uses the same type for all values.
2683 llvm::Type *Ty = Src->getType();
2684 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2685
2686 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2687 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2688 Function *F = CGM.getIntrinsic(IID, Ty);
2689 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2690}
2691
2692// Map math builtins for long-double to f128 version.
2693static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2694 switch (BuiltinID) {
2695#define MUTATE_LDBL(func) \
2696 case Builtin::BI__builtin_##func##l: \
2697 return Builtin::BI__builtin_##func##f128;
2728 MUTATE_LDBL(nans)
2729 MUTATE_LDBL(inf)
2748 MUTATE_LDBL(huge_val)
2758#undef MUTATE_LDBL
2759 default:
2760 return BuiltinID;
2761 }
2762}
2763
2764static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2765 Value *V) {
2766 if (CGF.Builder.getIsFPConstrained() &&
2767 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2768 if (Value *Result =
2769 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2770 return Result;
2771 }
2772 return nullptr;
2773}
2774
2776 const FunctionDecl *FD) {
2777 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2778 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2779 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2780
2782 for (auto &&FormalTy : FnTy->params())
2783 Args.push_back(llvm::PoisonValue::get(FormalTy));
2784
2785 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2786}
2787
2788RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2789 const CallExpr *E,
2790 ReturnValueSlot ReturnValue) {
2791 assert(!getContext().BuiltinInfo.isImmediate(BuiltinID) &&
2792 "Should not codegen for consteval builtins");
2793
2794 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2795 // See if we can constant fold this builtin. If so, don't emit it at all.
2796 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2799 !Result.hasSideEffects()) {
2800 if (Result.Val.isInt())
2801 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2802 Result.Val.getInt()));
2803 if (Result.Val.isFloat())
2804 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2805 Result.Val.getFloat()));
2806 }
2807
2808 // If current long-double semantics is IEEE 128-bit, replace math builtins
2809 // of long-double with f128 equivalent.
2810 // TODO: This mutation should also be applied to other targets other than PPC,
2811 // after backend supports IEEE 128-bit style libcalls.
2812 if (getTarget().getTriple().isPPC64() &&
2813 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2814 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2815
2816 // If the builtin has been declared explicitly with an assembler label,
2817 // disable the specialized emitting below. Ideally we should communicate the
2818 // rename in IR, or at least avoid generating the intrinsic calls that are
2819 // likely to get lowered to the renamed library functions.
2820 const unsigned BuiltinIDIfNoAsmLabel =
2821 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2822
2823 std::optional<bool> ErrnoOverriden;
2824 // ErrnoOverriden is true if math-errno is overriden via the
2825 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2826 // which implies math-errno.
2827 if (E->hasStoredFPFeatures()) {
2828 FPOptionsOverride OP = E->getFPFeatures();
2829 if (OP.hasMathErrnoOverride())
2830 ErrnoOverriden = OP.getMathErrnoOverride();
2831 }
2832 // True if 'attribute__((optnone))' is used. This attribute overrides
2833 // fast-math which implies math-errno.
2834 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2835
2836 // True if we are compiling at -O2 and errno has been disabled
2837 // using the '#pragma float_control(precise, off)', and
2838 // attribute opt-none hasn't been seen.
2839 bool ErrnoOverridenToFalseWithOpt =
2840 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2841 CGM.getCodeGenOpts().OptimizationLevel != 0;
2842
2843 // There are LLVM math intrinsics/instructions corresponding to math library
2844 // functions except the LLVM op will never set errno while the math library
2845 // might. Also, math builtins have the same semantics as their math library
2846 // twins. Thus, we can transform math library and builtin calls to their
2847 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2848 // In case FP exceptions are enabled, the experimental versions of the
2849 // intrinsics model those.
2850 bool ConstAlways =
2851 getContext().BuiltinInfo.isConst(BuiltinID);
2852
2853 // There's a special case with the fma builtins where they are always const
2854 // if the target environment is GNU or the target is OS is Windows and we're
2855 // targeting the MSVCRT.dll environment.
2856 // FIXME: This list can be become outdated. Need to find a way to get it some
2857 // other way.
2858 switch (BuiltinID) {
2859 case Builtin::BI__builtin_fma:
2860 case Builtin::BI__builtin_fmaf:
2861 case Builtin::BI__builtin_fmal:
2862 case Builtin::BI__builtin_fmaf16:
2863 case Builtin::BIfma:
2864 case Builtin::BIfmaf:
2865 case Builtin::BIfmal: {
2866 auto &Trip = CGM.getTriple();
2867 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2868 ConstAlways = true;
2869 break;
2870 }
2871 default:
2872 break;
2873 }
2874
2875 bool ConstWithoutErrnoAndExceptions =
2877 bool ConstWithoutExceptions =
2879
2880 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2881 // disabled.
2882 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2883 // or attributes that affect math-errno should prevent or allow math
2884 // intrincs to be generated. Intrinsics are generated:
2885 // 1- In fast math mode, unless math-errno is overriden
2886 // via '#pragma float_control(precise, on)', or via an
2887 // 'attribute__((optnone))'.
2888 // 2- If math-errno was enabled on command line but overriden
2889 // to false via '#pragma float_control(precise, off))' and
2890 // 'attribute__((optnone))' hasn't been used.
2891 // 3- If we are compiling with optimization and errno has been disabled
2892 // via '#pragma float_control(precise, off)', and
2893 // 'attribute__((optnone))' hasn't been used.
2894
2895 bool ConstWithoutErrnoOrExceptions =
2896 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2897 bool GenerateIntrinsics =
2898 (ConstAlways && !OptNone) ||
2899 (!getLangOpts().MathErrno &&
2900 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2901 if (!GenerateIntrinsics) {
2902 GenerateIntrinsics =
2903 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2904 if (!GenerateIntrinsics)
2905 GenerateIntrinsics =
2906 ConstWithoutErrnoOrExceptions &&
2907 (!getLangOpts().MathErrno &&
2908 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2909 if (!GenerateIntrinsics)
2910 GenerateIntrinsics =
2911 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2912 }
2913 if (GenerateIntrinsics) {
2914 switch (BuiltinIDIfNoAsmLabel) {
2915 case Builtin::BIacos:
2916 case Builtin::BIacosf:
2917 case Builtin::BIacosl:
2918 case Builtin::BI__builtin_acos:
2919 case Builtin::BI__builtin_acosf:
2920 case Builtin::BI__builtin_acosf16:
2921 case Builtin::BI__builtin_acosl:
2922 case Builtin::BI__builtin_acosf128:
2924 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2925
2926 case Builtin::BIasin:
2927 case Builtin::BIasinf:
2928 case Builtin::BIasinl:
2929 case Builtin::BI__builtin_asin:
2930 case Builtin::BI__builtin_asinf:
2931 case Builtin::BI__builtin_asinf16:
2932 case Builtin::BI__builtin_asinl:
2933 case Builtin::BI__builtin_asinf128:
2935 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2936
2937 case Builtin::BIatan:
2938 case Builtin::BIatanf:
2939 case Builtin::BIatanl:
2940 case Builtin::BI__builtin_atan:
2941 case Builtin::BI__builtin_atanf:
2942 case Builtin::BI__builtin_atanf16:
2943 case Builtin::BI__builtin_atanl:
2944 case Builtin::BI__builtin_atanf128:
2946 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2947
2948 case Builtin::BIatan2:
2949 case Builtin::BIatan2f:
2950 case Builtin::BIatan2l:
2951 case Builtin::BI__builtin_atan2:
2952 case Builtin::BI__builtin_atan2f:
2953 case Builtin::BI__builtin_atan2f16:
2954 case Builtin::BI__builtin_atan2l:
2955 case Builtin::BI__builtin_atan2f128:
2957 *this, E, Intrinsic::atan2,
2958 Intrinsic::experimental_constrained_atan2));
2959
2960 case Builtin::BIceil:
2961 case Builtin::BIceilf:
2962 case Builtin::BIceill:
2963 case Builtin::BI__builtin_ceil:
2964 case Builtin::BI__builtin_ceilf:
2965 case Builtin::BI__builtin_ceilf16:
2966 case Builtin::BI__builtin_ceill:
2967 case Builtin::BI__builtin_ceilf128:
2969 Intrinsic::ceil,
2970 Intrinsic::experimental_constrained_ceil));
2971
2972 case Builtin::BIcopysign:
2973 case Builtin::BIcopysignf:
2974 case Builtin::BIcopysignl:
2975 case Builtin::BI__builtin_copysign:
2976 case Builtin::BI__builtin_copysignf:
2977 case Builtin::BI__builtin_copysignf16:
2978 case Builtin::BI__builtin_copysignl:
2979 case Builtin::BI__builtin_copysignf128:
2980 return RValue::get(
2981 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2982
2983 case Builtin::BIcos:
2984 case Builtin::BIcosf:
2985 case Builtin::BIcosl:
2986 case Builtin::BI__builtin_cos:
2987 case Builtin::BI__builtin_cosf:
2988 case Builtin::BI__builtin_cosf16:
2989 case Builtin::BI__builtin_cosl:
2990 case Builtin::BI__builtin_cosf128:
2992 Intrinsic::cos,
2993 Intrinsic::experimental_constrained_cos));
2994
2995 case Builtin::BIcosh:
2996 case Builtin::BIcoshf:
2997 case Builtin::BIcoshl:
2998 case Builtin::BI__builtin_cosh:
2999 case Builtin::BI__builtin_coshf:
3000 case Builtin::BI__builtin_coshf16:
3001 case Builtin::BI__builtin_coshl:
3002 case Builtin::BI__builtin_coshf128:
3004 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
3005
3006 case Builtin::BIexp:
3007 case Builtin::BIexpf:
3008 case Builtin::BIexpl:
3009 case Builtin::BI__builtin_exp:
3010 case Builtin::BI__builtin_expf:
3011 case Builtin::BI__builtin_expf16:
3012 case Builtin::BI__builtin_expl:
3013 case Builtin::BI__builtin_expf128:
3015 Intrinsic::exp,
3016 Intrinsic::experimental_constrained_exp));
3017
3018 case Builtin::BIexp2:
3019 case Builtin::BIexp2f:
3020 case Builtin::BIexp2l:
3021 case Builtin::BI__builtin_exp2:
3022 case Builtin::BI__builtin_exp2f:
3023 case Builtin::BI__builtin_exp2f16:
3024 case Builtin::BI__builtin_exp2l:
3025 case Builtin::BI__builtin_exp2f128:
3027 Intrinsic::exp2,
3028 Intrinsic::experimental_constrained_exp2));
3029 case Builtin::BI__builtin_exp10:
3030 case Builtin::BI__builtin_exp10f:
3031 case Builtin::BI__builtin_exp10f16:
3032 case Builtin::BI__builtin_exp10l:
3033 case Builtin::BI__builtin_exp10f128: {
3034 // TODO: strictfp support
3035 if (Builder.getIsFPConstrained())
3036 break;
3037 return RValue::get(
3038 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
3039 }
3040 case Builtin::BIfabs:
3041 case Builtin::BIfabsf:
3042 case Builtin::BIfabsl:
3043 case Builtin::BI__builtin_fabs:
3044 case Builtin::BI__builtin_fabsf:
3045 case Builtin::BI__builtin_fabsf16:
3046 case Builtin::BI__builtin_fabsl:
3047 case Builtin::BI__builtin_fabsf128:
3048 return RValue::get(
3049 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
3050
3051 case Builtin::BIfloor:
3052 case Builtin::BIfloorf:
3053 case Builtin::BIfloorl:
3054 case Builtin::BI__builtin_floor:
3055 case Builtin::BI__builtin_floorf:
3056 case Builtin::BI__builtin_floorf16:
3057 case Builtin::BI__builtin_floorl:
3058 case Builtin::BI__builtin_floorf128:
3060 Intrinsic::floor,
3061 Intrinsic::experimental_constrained_floor));
3062
3063 case Builtin::BIfma:
3064 case Builtin::BIfmaf:
3065 case Builtin::BIfmal:
3066 case Builtin::BI__builtin_fma:
3067 case Builtin::BI__builtin_fmaf:
3068 case Builtin::BI__builtin_fmaf16:
3069 case Builtin::BI__builtin_fmal:
3070 case Builtin::BI__builtin_fmaf128:
3072 Intrinsic::fma,
3073 Intrinsic::experimental_constrained_fma));
3074
3075 case Builtin::BIfmax:
3076 case Builtin::BIfmaxf:
3077 case Builtin::BIfmaxl:
3078 case Builtin::BI__builtin_fmax:
3079 case Builtin::BI__builtin_fmaxf:
3080 case Builtin::BI__builtin_fmaxf16:
3081 case Builtin::BI__builtin_fmaxl:
3082 case Builtin::BI__builtin_fmaxf128:
3084 Intrinsic::maxnum,
3085 Intrinsic::experimental_constrained_maxnum));
3086
3087 case Builtin::BIfmin:
3088 case Builtin::BIfminf:
3089 case Builtin::BIfminl:
3090 case Builtin::BI__builtin_fmin:
3091 case Builtin::BI__builtin_fminf:
3092 case Builtin::BI__builtin_fminf16:
3093 case Builtin::BI__builtin_fminl:
3094 case Builtin::BI__builtin_fminf128:
3096 Intrinsic::minnum,
3097 Intrinsic::experimental_constrained_minnum));
3098
3099 case Builtin::BIfmaximum_num:
3100 case Builtin::BIfmaximum_numf:
3101 case Builtin::BIfmaximum_numl:
3102 case Builtin::BI__builtin_fmaximum_num:
3103 case Builtin::BI__builtin_fmaximum_numf:
3104 case Builtin::BI__builtin_fmaximum_numf16:
3105 case Builtin::BI__builtin_fmaximum_numl:
3106 case Builtin::BI__builtin_fmaximum_numf128:
3107 return RValue::get(
3108 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::maximumnum));
3109
3110 case Builtin::BIfminimum_num:
3111 case Builtin::BIfminimum_numf:
3112 case Builtin::BIfminimum_numl:
3113 case Builtin::BI__builtin_fminimum_num:
3114 case Builtin::BI__builtin_fminimum_numf:
3115 case Builtin::BI__builtin_fminimum_numf16:
3116 case Builtin::BI__builtin_fminimum_numl:
3117 case Builtin::BI__builtin_fminimum_numf128:
3118 return RValue::get(
3119 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::minimumnum));
3120
3121 // fmod() is a special-case. It maps to the frem instruction rather than an
3122 // LLVM intrinsic.
3123 case Builtin::BIfmod:
3124 case Builtin::BIfmodf:
3125 case Builtin::BIfmodl:
3126 case Builtin::BI__builtin_fmod:
3127 case Builtin::BI__builtin_fmodf:
3128 case Builtin::BI__builtin_fmodf16:
3129 case Builtin::BI__builtin_fmodl:
3130 case Builtin::BI__builtin_fmodf128:
3131 case Builtin::BI__builtin_elementwise_fmod: {
3132 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3133 Value *Arg1 = EmitScalarExpr(E->getArg(0));
3134 Value *Arg2 = EmitScalarExpr(E->getArg(1));
3135 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
3136 }
3137
3138 case Builtin::BIlog:
3139 case Builtin::BIlogf:
3140 case Builtin::BIlogl:
3141 case Builtin::BI__builtin_log:
3142 case Builtin::BI__builtin_logf:
3143 case Builtin::BI__builtin_logf16:
3144 case Builtin::BI__builtin_logl:
3145 case Builtin::BI__builtin_logf128:
3147 Intrinsic::log,
3148 Intrinsic::experimental_constrained_log));
3149
3150 case Builtin::BIlog10:
3151 case Builtin::BIlog10f:
3152 case Builtin::BIlog10l:
3153 case Builtin::BI__builtin_log10:
3154 case Builtin::BI__builtin_log10f:
3155 case Builtin::BI__builtin_log10f16:
3156 case Builtin::BI__builtin_log10l:
3157 case Builtin::BI__builtin_log10f128:
3159 Intrinsic::log10,
3160 Intrinsic::experimental_constrained_log10));
3161
3162 case Builtin::BIlog2:
3163 case Builtin::BIlog2f:
3164 case Builtin::BIlog2l:
3165 case Builtin::BI__builtin_log2:
3166 case Builtin::BI__builtin_log2f:
3167 case Builtin::BI__builtin_log2f16:
3168 case Builtin::BI__builtin_log2l:
3169 case Builtin::BI__builtin_log2f128:
3171 Intrinsic::log2,
3172 Intrinsic::experimental_constrained_log2));
3173
3174 case Builtin::BInearbyint:
3175 case Builtin::BInearbyintf:
3176 case Builtin::BInearbyintl:
3177 case Builtin::BI__builtin_nearbyint:
3178 case Builtin::BI__builtin_nearbyintf:
3179 case Builtin::BI__builtin_nearbyintl:
3180 case Builtin::BI__builtin_nearbyintf128:
3182 Intrinsic::nearbyint,
3183 Intrinsic::experimental_constrained_nearbyint));
3184
3185 case Builtin::BIpow:
3186 case Builtin::BIpowf:
3187 case Builtin::BIpowl:
3188 case Builtin::BI__builtin_pow:
3189 case Builtin::BI__builtin_powf:
3190 case Builtin::BI__builtin_powf16:
3191 case Builtin::BI__builtin_powl:
3192 case Builtin::BI__builtin_powf128:
3194 Intrinsic::pow,
3195 Intrinsic::experimental_constrained_pow));
3196
3197 case Builtin::BIrint:
3198 case Builtin::BIrintf:
3199 case Builtin::BIrintl:
3200 case Builtin::BI__builtin_rint:
3201 case Builtin::BI__builtin_rintf:
3202 case Builtin::BI__builtin_rintf16:
3203 case Builtin::BI__builtin_rintl:
3204 case Builtin::BI__builtin_rintf128:
3206 Intrinsic::rint,
3207 Intrinsic::experimental_constrained_rint));
3208
3209 case Builtin::BIround:
3210 case Builtin::BIroundf:
3211 case Builtin::BIroundl:
3212 case Builtin::BI__builtin_round:
3213 case Builtin::BI__builtin_roundf:
3214 case Builtin::BI__builtin_roundf16:
3215 case Builtin::BI__builtin_roundl:
3216 case Builtin::BI__builtin_roundf128:
3218 Intrinsic::round,
3219 Intrinsic::experimental_constrained_round));
3220
3221 case Builtin::BIroundeven:
3222 case Builtin::BIroundevenf:
3223 case Builtin::BIroundevenl:
3224 case Builtin::BI__builtin_roundeven:
3225 case Builtin::BI__builtin_roundevenf:
3226 case Builtin::BI__builtin_roundevenf16:
3227 case Builtin::BI__builtin_roundevenl:
3228 case Builtin::BI__builtin_roundevenf128:
3230 Intrinsic::roundeven,
3231 Intrinsic::experimental_constrained_roundeven));
3232
3233 case Builtin::BIsin:
3234 case Builtin::BIsinf:
3235 case Builtin::BIsinl:
3236 case Builtin::BI__builtin_sin:
3237 case Builtin::BI__builtin_sinf:
3238 case Builtin::BI__builtin_sinf16:
3239 case Builtin::BI__builtin_sinl:
3240 case Builtin::BI__builtin_sinf128:
3242 Intrinsic::sin,
3243 Intrinsic::experimental_constrained_sin));
3244
3245 case Builtin::BIsinh:
3246 case Builtin::BIsinhf:
3247 case Builtin::BIsinhl:
3248 case Builtin::BI__builtin_sinh:
3249 case Builtin::BI__builtin_sinhf:
3250 case Builtin::BI__builtin_sinhf16:
3251 case Builtin::BI__builtin_sinhl:
3252 case Builtin::BI__builtin_sinhf128:
3254 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
3255
3256 case Builtin::BI__builtin_sincos:
3257 case Builtin::BI__builtin_sincosf:
3258 case Builtin::BI__builtin_sincosf16:
3259 case Builtin::BI__builtin_sincosl:
3260 case Builtin::BI__builtin_sincosf128:
3261 emitSincosBuiltin(*this, E, Intrinsic::sincos);
3262 return RValue::get(nullptr);
3263
3264 case Builtin::BIsqrt:
3265 case Builtin::BIsqrtf:
3266 case Builtin::BIsqrtl:
3267 case Builtin::BI__builtin_sqrt:
3268 case Builtin::BI__builtin_sqrtf:
3269 case Builtin::BI__builtin_sqrtf16:
3270 case Builtin::BI__builtin_sqrtl:
3271 case Builtin::BI__builtin_sqrtf128:
3272 case Builtin::BI__builtin_elementwise_sqrt: {
3274 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
3276 return RValue::get(Call);
3277 }
3278
3279 case Builtin::BItan:
3280 case Builtin::BItanf:
3281 case Builtin::BItanl:
3282 case Builtin::BI__builtin_tan:
3283 case Builtin::BI__builtin_tanf:
3284 case Builtin::BI__builtin_tanf16:
3285 case Builtin::BI__builtin_tanl:
3286 case Builtin::BI__builtin_tanf128:
3288 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
3289
3290 case Builtin::BItanh:
3291 case Builtin::BItanhf:
3292 case Builtin::BItanhl:
3293 case Builtin::BI__builtin_tanh:
3294 case Builtin::BI__builtin_tanhf:
3295 case Builtin::BI__builtin_tanhf16:
3296 case Builtin::BI__builtin_tanhl:
3297 case Builtin::BI__builtin_tanhf128:
3299 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3300
3301 case Builtin::BItrunc:
3302 case Builtin::BItruncf:
3303 case Builtin::BItruncl:
3304 case Builtin::BI__builtin_trunc:
3305 case Builtin::BI__builtin_truncf:
3306 case Builtin::BI__builtin_truncf16:
3307 case Builtin::BI__builtin_truncl:
3308 case Builtin::BI__builtin_truncf128:
3310 Intrinsic::trunc,
3311 Intrinsic::experimental_constrained_trunc));
3312
3313 case Builtin::BIlround:
3314 case Builtin::BIlroundf:
3315 case Builtin::BIlroundl:
3316 case Builtin::BI__builtin_lround:
3317 case Builtin::BI__builtin_lroundf:
3318 case Builtin::BI__builtin_lroundl:
3319 case Builtin::BI__builtin_lroundf128:
3321 *this, E, Intrinsic::lround,
3322 Intrinsic::experimental_constrained_lround));
3323
3324 case Builtin::BIllround:
3325 case Builtin::BIllroundf:
3326 case Builtin::BIllroundl:
3327 case Builtin::BI__builtin_llround:
3328 case Builtin::BI__builtin_llroundf:
3329 case Builtin::BI__builtin_llroundl:
3330 case Builtin::BI__builtin_llroundf128:
3332 *this, E, Intrinsic::llround,
3333 Intrinsic::experimental_constrained_llround));
3334
3335 case Builtin::BIlrint:
3336 case Builtin::BIlrintf:
3337 case Builtin::BIlrintl:
3338 case Builtin::BI__builtin_lrint:
3339 case Builtin::BI__builtin_lrintf:
3340 case Builtin::BI__builtin_lrintl:
3341 case Builtin::BI__builtin_lrintf128:
3343 *this, E, Intrinsic::lrint,
3344 Intrinsic::experimental_constrained_lrint));
3345
3346 case Builtin::BIllrint:
3347 case Builtin::BIllrintf:
3348 case Builtin::BIllrintl:
3349 case Builtin::BI__builtin_llrint:
3350 case Builtin::BI__builtin_llrintf:
3351 case Builtin::BI__builtin_llrintl:
3352 case Builtin::BI__builtin_llrintf128:
3354 *this, E, Intrinsic::llrint,
3355 Intrinsic::experimental_constrained_llrint));
3356 case Builtin::BI__builtin_ldexp:
3357 case Builtin::BI__builtin_ldexpf:
3358 case Builtin::BI__builtin_ldexpl:
3359 case Builtin::BI__builtin_ldexpf16:
3360 case Builtin::BI__builtin_ldexpf128: {
3362 *this, E, Intrinsic::ldexp,
3363 Intrinsic::experimental_constrained_ldexp));
3364 }
3365 default:
3366 break;
3367 }
3368 }
3369
3370 // Check NonnullAttribute/NullabilityArg and Alignment.
3371 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3372 unsigned ParmNum) {
3373 Value *Val = A.emitRawPointer(*this);
3374 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3375 ParmNum);
3376
3377 if (SanOpts.has(SanitizerKind::Alignment)) {
3378 SanitizerSet SkippedChecks;
3379 SkippedChecks.set(SanitizerKind::All);
3380 SkippedChecks.clear(SanitizerKind::Alignment);
3381 SourceLocation Loc = Arg->getExprLoc();
3382 // Strip an implicit cast.
3383 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3384 if (CE->getCastKind() == CK_BitCast)
3385 Arg = CE->getSubExpr();
3386 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3387 SkippedChecks);
3388 }
3389 };
3390
3391 switch (BuiltinIDIfNoAsmLabel) {
3392 default: break;
3393 case Builtin::BI__builtin___CFStringMakeConstantString:
3394 case Builtin::BI__builtin___NSStringMakeConstantString:
3395 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3396 case Builtin::BI__builtin_stdarg_start:
3397 case Builtin::BI__builtin_va_start:
3398 case Builtin::BI__va_start:
3399 case Builtin::BI__builtin_va_end:
3400 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3401 ? EmitScalarExpr(E->getArg(0))
3402 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3403 BuiltinID != Builtin::BI__builtin_va_end);
3404 return RValue::get(nullptr);
3405 case Builtin::BI__builtin_va_copy: {
3406 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3407 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3408 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3409 {DstPtr, SrcPtr});
3410 return RValue::get(nullptr);
3411 }
3412 case Builtin::BIabs:
3413 case Builtin::BIlabs:
3414 case Builtin::BIllabs:
3415 case Builtin::BI__builtin_abs:
3416 case Builtin::BI__builtin_labs:
3417 case Builtin::BI__builtin_llabs: {
3418 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3419
3420 Value *Result;
3421 switch (getLangOpts().getSignedOverflowBehavior()) {
3423 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3424 break;
3426 if (!SanitizeOverflow) {
3427 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3428 break;
3429 }
3430 [[fallthrough]];
3432 // TODO: Somehow handle the corner case when the address of abs is taken.
3433 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3434 break;
3435 }
3436 return RValue::get(Result);
3437 }
3438 case Builtin::BI__builtin_complex: {
3439 Value *Real = EmitScalarExpr(E->getArg(0));
3440 Value *Imag = EmitScalarExpr(E->getArg(1));
3441 return RValue::getComplex({Real, Imag});
3442 }
3443 case Builtin::BI__builtin_conj:
3444 case Builtin::BI__builtin_conjf:
3445 case Builtin::BI__builtin_conjl:
3446 case Builtin::BIconj:
3447 case Builtin::BIconjf:
3448 case Builtin::BIconjl: {
3449 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3450 Value *Real = ComplexVal.first;
3451 Value *Imag = ComplexVal.second;
3452 Imag = Builder.CreateFNeg(Imag, "neg");
3453 return RValue::getComplex(std::make_pair(Real, Imag));
3454 }
3455 case Builtin::BI__builtin_creal:
3456 case Builtin::BI__builtin_crealf:
3457 case Builtin::BI__builtin_creall:
3458 case Builtin::BIcreal:
3459 case Builtin::BIcrealf:
3460 case Builtin::BIcreall: {
3461 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3462 return RValue::get(ComplexVal.first);
3463 }
3464
3465 case Builtin::BI__builtin_preserve_access_index: {
3466 // Only enabled preserved access index region when debuginfo
3467 // is available as debuginfo is needed to preserve user-level
3468 // access pattern.
3469 if (!getDebugInfo()) {
3470 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3471 return RValue::get(EmitScalarExpr(E->getArg(0)));
3472 }
3473
3474 // Nested builtin_preserve_access_index() not supported
3476 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3477 return RValue::get(EmitScalarExpr(E->getArg(0)));
3478 }
3479
3480 IsInPreservedAIRegion = true;
3481 Value *Res = EmitScalarExpr(E->getArg(0));
3482 IsInPreservedAIRegion = false;
3483 return RValue::get(Res);
3484 }
3485
3486 case Builtin::BI__builtin_cimag:
3487 case Builtin::BI__builtin_cimagf:
3488 case Builtin::BI__builtin_cimagl:
3489 case Builtin::BIcimag:
3490 case Builtin::BIcimagf:
3491 case Builtin::BIcimagl: {
3492 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3493 return RValue::get(ComplexVal.second);
3494 }
3495
3496 case Builtin::BI__builtin_clrsb:
3497 case Builtin::BI__builtin_clrsbl:
3498 case Builtin::BI__builtin_clrsbll: {
3499 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3500 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3501
3502 llvm::Type *ArgType = ArgValue->getType();
3503 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3504
3505 llvm::Type *ResultType = ConvertType(E->getType());
3506 Value *Zero = llvm::Constant::getNullValue(ArgType);
3507 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3508 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3509 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3510 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3511 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3512 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3513 "cast");
3514 return RValue::get(Result);
3515 }
3516 case Builtin::BI__builtin_ctzs:
3517 case Builtin::BI__builtin_ctz:
3518 case Builtin::BI__builtin_ctzl:
3519 case Builtin::BI__builtin_ctzll:
3520 case Builtin::BI__builtin_ctzg: {
3521 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3522 E->getNumArgs() > 1;
3523
3524 Value *ArgValue =
3525 HasFallback ? EmitScalarExpr(E->getArg(0))
3527
3528 llvm::Type *ArgType = ArgValue->getType();
3529 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3530
3531 llvm::Type *ResultType = ConvertType(E->getType());
3532 Value *ZeroUndef =
3533 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3534 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3535 if (Result->getType() != ResultType)
3536 Result =
3537 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3538 if (!HasFallback)
3539 return RValue::get(Result);
3540
3541 Value *Zero = Constant::getNullValue(ArgType);
3542 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3543 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3544 Value *ResultOrFallback =
3545 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3546 return RValue::get(ResultOrFallback);
3547 }
3548 case Builtin::BI__builtin_clzs:
3549 case Builtin::BI__builtin_clz:
3550 case Builtin::BI__builtin_clzl:
3551 case Builtin::BI__builtin_clzll:
3552 case Builtin::BI__builtin_clzg: {
3553 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3554 E->getNumArgs() > 1;
3555
3556 Value *ArgValue =
3557 HasFallback ? EmitScalarExpr(E->getArg(0))
3559
3560 llvm::Type *ArgType = ArgValue->getType();
3561 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3562
3563 llvm::Type *ResultType = ConvertType(E->getType());
3564 Value *ZeroUndef =
3565 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3566 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3567 if (Result->getType() != ResultType)
3568 Result =
3569 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3570 if (!HasFallback)
3571 return RValue::get(Result);
3572
3573 Value *Zero = Constant::getNullValue(ArgType);
3574 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3575 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3576 Value *ResultOrFallback =
3577 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3578 return RValue::get(ResultOrFallback);
3579 }
3580 case Builtin::BI__builtin_ffs:
3581 case Builtin::BI__builtin_ffsl:
3582 case Builtin::BI__builtin_ffsll: {
3583 // ffs(x) -> x ? cttz(x) + 1 : 0
3584 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3585
3586 llvm::Type *ArgType = ArgValue->getType();
3587 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3588
3589 llvm::Type *ResultType = ConvertType(E->getType());
3590 Value *Tmp =
3591 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3592 llvm::ConstantInt::get(ArgType, 1));
3593 Value *Zero = llvm::Constant::getNullValue(ArgType);
3594 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3595 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3596 if (Result->getType() != ResultType)
3597 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3598 "cast");
3599 return RValue::get(Result);
3600 }
3601 case Builtin::BI__builtin_parity:
3602 case Builtin::BI__builtin_parityl:
3603 case Builtin::BI__builtin_parityll: {
3604 // parity(x) -> ctpop(x) & 1
3605 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3606
3607 llvm::Type *ArgType = ArgValue->getType();
3608 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3609
3610 llvm::Type *ResultType = ConvertType(E->getType());
3611 Value *Tmp = Builder.CreateCall(F, ArgValue);
3612 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3613 if (Result->getType() != ResultType)
3614 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3615 "cast");
3616 return RValue::get(Result);
3617 }
3618 case Builtin::BI__lzcnt16:
3619 case Builtin::BI__lzcnt:
3620 case Builtin::BI__lzcnt64: {
3621 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3622
3623 llvm::Type *ArgType = ArgValue->getType();
3624 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3625
3626 llvm::Type *ResultType = ConvertType(E->getType());
3627 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3628 if (Result->getType() != ResultType)
3629 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3630 "cast");
3631 return RValue::get(Result);
3632 }
3633 case Builtin::BI__popcnt16:
3634 case Builtin::BI__popcnt:
3635 case Builtin::BI__popcnt64:
3636 case Builtin::BI__builtin_popcount:
3637 case Builtin::BI__builtin_popcountl:
3638 case Builtin::BI__builtin_popcountll:
3639 case Builtin::BI__builtin_popcountg: {
3640 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3641
3642 llvm::Type *ArgType = ArgValue->getType();
3643 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3644
3645 llvm::Type *ResultType = ConvertType(E->getType());
3646 Value *Result = Builder.CreateCall(F, ArgValue);
3647 if (Result->getType() != ResultType)
3648 Result =
3649 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3650 return RValue::get(Result);
3651 }
3652 case Builtin::BI__builtin_unpredictable: {
3653 // Always return the argument of __builtin_unpredictable. LLVM does not
3654 // handle this builtin. Metadata for this builtin should be added directly
3655 // to instructions such as branches or switches that use it.
3656 return RValue::get(EmitScalarExpr(E->getArg(0)));
3657 }
3658 case Builtin::BI__builtin_expect: {
3659 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3660 llvm::Type *ArgType = ArgValue->getType();
3661
3662 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3663 // Don't generate llvm.expect on -O0 as the backend won't use it for
3664 // anything.
3665 // Note, we still IRGen ExpectedValue because it could have side-effects.
3666 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3667 return RValue::get(ArgValue);
3668
3669 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3670 Value *Result =
3671 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3672 return RValue::get(Result);
3673 }
3674 case Builtin::BI__builtin_expect_with_probability: {
3675 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3676 llvm::Type *ArgType = ArgValue->getType();
3677
3678 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3679 llvm::APFloat Probability(0.0);
3680 const Expr *ProbArg = E->getArg(2);
3681 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3682 assert(EvalSucceed && "probability should be able to evaluate as float");
3683 (void)EvalSucceed;
3684 bool LoseInfo = false;
3685 Probability.convert(llvm::APFloat::IEEEdouble(),
3686 llvm::RoundingMode::Dynamic, &LoseInfo);
3687 llvm::Type *Ty = ConvertType(ProbArg->getType());
3688 Constant *Confidence = ConstantFP::get(Ty, Probability);
3689 // Don't generate llvm.expect.with.probability on -O0 as the backend
3690 // won't use it for anything.
3691 // Note, we still IRGen ExpectedValue because it could have side-effects.
3692 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3693 return RValue::get(ArgValue);
3694
3695 Function *FnExpect =
3696 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3697 Value *Result = Builder.CreateCall(
3698 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3699 return RValue::get(Result);
3700 }
3701 case Builtin::BI__builtin_assume_aligned: {
3702 const Expr *Ptr = E->getArg(0);
3703 Value *PtrValue = EmitScalarExpr(Ptr);
3704 Value *OffsetValue =
3705 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3706
3707 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3708 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3709 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3710 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3711 llvm::Value::MaximumAlignment);
3712
3713 emitAlignmentAssumption(PtrValue, Ptr,
3714 /*The expr loc is sufficient.*/ SourceLocation(),
3715 AlignmentCI, OffsetValue);
3716 return RValue::get(PtrValue);
3717 }
3718 case Builtin::BI__assume:
3719 case Builtin::BI__builtin_assume: {
3720 if (E->getArg(0)->HasSideEffects(getContext()))
3721 return RValue::get(nullptr);
3722
3723 Value *ArgValue = EmitCheckedArgForAssume(E->getArg(0));
3724 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3725 Builder.CreateCall(FnAssume, ArgValue);
3726 return RValue::get(nullptr);
3727 }
3728 case Builtin::BI__builtin_assume_separate_storage: {
3729 const Expr *Arg0 = E->getArg(0);
3730 const Expr *Arg1 = E->getArg(1);
3731
3732 Value *Value0 = EmitScalarExpr(Arg0);
3733 Value *Value1 = EmitScalarExpr(Arg1);
3734
3735 Value *Values[] = {Value0, Value1};
3736 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3737 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3738 return RValue::get(nullptr);
3739 }
3740 case Builtin::BI__builtin_allow_runtime_check: {
3741 StringRef Kind =
3742 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3743 LLVMContext &Ctx = CGM.getLLVMContext();
3744 llvm::Value *Allow = Builder.CreateCall(
3745 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3746 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3747 return RValue::get(Allow);
3748 }
3749 case Builtin::BI__arithmetic_fence: {
3750 // Create the builtin call if FastMath is selected, and the target
3751 // supports the builtin, otherwise just return the argument.
3752 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3753 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3754 bool isArithmeticFenceEnabled =
3755 FMF.allowReassoc() &&
3757 QualType ArgType = E->getArg(0)->getType();
3758 if (ArgType->isComplexType()) {
3759 if (isArithmeticFenceEnabled) {
3760 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3761 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3762 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3763 ConvertType(ElementType));
3764 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3765 ConvertType(ElementType));
3766 return RValue::getComplex(std::make_pair(Real, Imag));
3767 }
3768 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3769 Value *Real = ComplexVal.first;
3770 Value *Imag = ComplexVal.second;
3771 return RValue::getComplex(std::make_pair(Real, Imag));
3772 }
3773 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3774 if (isArithmeticFenceEnabled)
3775 return RValue::get(
3776 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3777 return RValue::get(ArgValue);
3778 }
3779 case Builtin::BI__builtin_bswap16:
3780 case Builtin::BI__builtin_bswap32:
3781 case Builtin::BI__builtin_bswap64:
3782 case Builtin::BI_byteswap_ushort:
3783 case Builtin::BI_byteswap_ulong:
3784 case Builtin::BI_byteswap_uint64: {
3785 return RValue::get(
3786 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3787 }
3788 case Builtin::BI__builtin_bitreverse8:
3789 case Builtin::BI__builtin_bitreverse16:
3790 case Builtin::BI__builtin_bitreverse32:
3791 case Builtin::BI__builtin_bitreverse64: {
3792 return RValue::get(
3793 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3794 }
3795 case Builtin::BI__builtin_rotateleft8:
3796 case Builtin::BI__builtin_rotateleft16:
3797 case Builtin::BI__builtin_rotateleft32:
3798 case Builtin::BI__builtin_rotateleft64:
3799 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3800 case Builtin::BI_rotl16:
3801 case Builtin::BI_rotl:
3802 case Builtin::BI_lrotl:
3803 case Builtin::BI_rotl64:
3804 return emitRotate(E, false);
3805
3806 case Builtin::BI__builtin_rotateright8:
3807 case Builtin::BI__builtin_rotateright16:
3808 case Builtin::BI__builtin_rotateright32:
3809 case Builtin::BI__builtin_rotateright64:
3810 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3811 case Builtin::BI_rotr16:
3812 case Builtin::BI_rotr:
3813 case Builtin::BI_lrotr:
3814 case Builtin::BI_rotr64:
3815 return emitRotate(E, true);
3816
3817 case Builtin::BI__builtin_constant_p: {
3818 llvm::Type *ResultType = ConvertType(E->getType());
3819
3820 const Expr *Arg = E->getArg(0);
3821 QualType ArgType = Arg->getType();
3822 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3823 // and likely a mistake.
3824 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3825 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3826 // Per the GCC documentation, only numeric constants are recognized after
3827 // inlining.
3828 return RValue::get(ConstantInt::get(ResultType, 0));
3829
3830 if (Arg->HasSideEffects(getContext()))
3831 // The argument is unevaluated, so be conservative if it might have
3832 // side-effects.
3833 return RValue::get(ConstantInt::get(ResultType, 0));
3834
3835 Value *ArgValue = EmitScalarExpr(Arg);
3836 if (ArgType->isObjCObjectPointerType()) {
3837 // Convert Objective-C objects to id because we cannot distinguish between
3838 // LLVM types for Obj-C classes as they are opaque.
3839 ArgType = CGM.getContext().getObjCIdType();
3840 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3841 }
3842 Function *F =
3843 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3844 Value *Result = Builder.CreateCall(F, ArgValue);
3845 if (Result->getType() != ResultType)
3846 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3847 return RValue::get(Result);
3848 }
3849 case Builtin::BI__builtin_dynamic_object_size:
3850 case Builtin::BI__builtin_object_size: {
3851 unsigned Type =
3852 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3853 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3854
3855 // We pass this builtin onto the optimizer so that it can figure out the
3856 // object size in more complex cases.
3857 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3858 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3859 /*EmittedE=*/nullptr, IsDynamic));
3860 }
3861 case Builtin::BI__builtin_counted_by_ref: {
3862 // Default to returning '(void *) 0'.
3863 llvm::Value *Result = llvm::ConstantPointerNull::get(
3864 llvm::PointerType::getUnqual(getLLVMContext()));
3865
3866 const Expr *Arg = E->getArg(0)->IgnoreParenImpCasts();
3867
3868 if (auto *UO = dyn_cast<UnaryOperator>(Arg);
3869 UO && UO->getOpcode() == UO_AddrOf) {
3870 Arg = UO->getSubExpr()->IgnoreParenImpCasts();
3871
3872 if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Arg))
3873 Arg = ASE->getBase()->IgnoreParenImpCasts();
3874 }
3875
3876 if (const MemberExpr *ME = dyn_cast_if_present<MemberExpr>(Arg)) {
3877 if (auto *CATy =
3878 ME->getMemberDecl()->getType()->getAs<CountAttributedType>();
3879 CATy && CATy->getKind() == CountAttributedType::CountedBy) {
3880 const auto *FAMDecl = cast<FieldDecl>(ME->getMemberDecl());
3881 if (const FieldDecl *CountFD = FAMDecl->findCountedByField())
3882 Result = GetCountedByFieldExprGEP(Arg, FAMDecl, CountFD);
3883 else
3884 llvm::report_fatal_error("Cannot find the counted_by 'count' field");
3885 }
3886 }
3887
3888 return RValue::get(Result);
3889 }
3890 case Builtin::BI__builtin_prefetch: {
3891 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3892 // FIXME: Technically these constants should of type 'int', yes?
3893 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3894 llvm::ConstantInt::get(Int32Ty, 0);
3895 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3896 llvm::ConstantInt::get(Int32Ty, 3);
3897 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3898 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3899 Builder.CreateCall(F, {Address, RW, Locality, Data});
3900 return RValue::get(nullptr);
3901 }
3902 case Builtin::BI__builtin_readcyclecounter: {
3903 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3904 return RValue::get(Builder.CreateCall(F));
3905 }
3906 case Builtin::BI__builtin_readsteadycounter: {
3907 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3908 return RValue::get(Builder.CreateCall(F));
3909 }
3910 case Builtin::BI__builtin___clear_cache: {
3911 Value *Begin = EmitScalarExpr(E->getArg(0));
3912 Value *End = EmitScalarExpr(E->getArg(1));
3913 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3914 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3915 }
3916 case Builtin::BI__builtin_trap:
3917 EmitTrapCall(Intrinsic::trap);
3918 return RValue::get(nullptr);
3919 case Builtin::BI__builtin_verbose_trap: {
3920 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3921 if (getDebugInfo()) {
3922 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3923 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3924 *E->getArg(1)->tryEvaluateString(getContext()));
3925 }
3926 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3927 // Currently no attempt is made to prevent traps from being merged.
3928 EmitTrapCall(Intrinsic::trap);
3929 return RValue::get(nullptr);
3930 }
3931 case Builtin::BI__debugbreak:
3932 EmitTrapCall(Intrinsic::debugtrap);
3933 return RValue::get(nullptr);
3934 case Builtin::BI__builtin_unreachable: {
3936
3937 // We do need to preserve an insertion point.
3938 EmitBlock(createBasicBlock("unreachable.cont"));
3939
3940 return RValue::get(nullptr);
3941 }
3942
3943 case Builtin::BI__builtin_powi:
3944 case Builtin::BI__builtin_powif:
3945 case Builtin::BI__builtin_powil: {
3946 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3947 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3948
3949 if (Builder.getIsFPConstrained()) {
3950 // FIXME: llvm.powi has 2 mangling types,
3951 // llvm.experimental.constrained.powi has one.
3952 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3953 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3954 Src0->getType());
3955 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3956 }
3957
3958 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3959 { Src0->getType(), Src1->getType() });
3960 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3961 }
3962 case Builtin::BI__builtin_frexpl: {
3963 // Linux PPC will not be adding additional PPCDoubleDouble support.
3964 // WIP to switch default to IEEE long double. Will emit libcall for
3965 // frexpl instead of legalizing this type in the BE.
3966 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3967 break;
3968 [[fallthrough]];
3969 }
3970 case Builtin::BI__builtin_frexp:
3971 case Builtin::BI__builtin_frexpf:
3972 case Builtin::BI__builtin_frexpf128:
3973 case Builtin::BI__builtin_frexpf16:
3974 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3975 case Builtin::BI__builtin_isgreater:
3976 case Builtin::BI__builtin_isgreaterequal:
3977 case Builtin::BI__builtin_isless:
3978 case Builtin::BI__builtin_islessequal:
3979 case Builtin::BI__builtin_islessgreater:
3980 case Builtin::BI__builtin_isunordered: {
3981 // Ordered comparisons: we know the arguments to these are matching scalar
3982 // floating point values.
3983 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3984 Value *LHS = EmitScalarExpr(E->getArg(0));
3985 Value *RHS = EmitScalarExpr(E->getArg(1));
3986
3987 switch (BuiltinID) {
3988 default: llvm_unreachable("Unknown ordered comparison");
3989 case Builtin::BI__builtin_isgreater:
3990 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3991 break;
3992 case Builtin::BI__builtin_isgreaterequal:
3993 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3994 break;
3995 case Builtin::BI__builtin_isless:
3996 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3997 break;
3998 case Builtin::BI__builtin_islessequal:
3999 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
4000 break;
4001 case Builtin::BI__builtin_islessgreater:
4002 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
4003 break;
4004 case Builtin::BI__builtin_isunordered:
4005 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
4006 break;
4007 }
4008 // ZExt bool to int type.
4009 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
4010 }
4011
4012 case Builtin::BI__builtin_isnan: {
4013 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4014 Value *V = EmitScalarExpr(E->getArg(0));
4015 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4016 return RValue::get(Result);
4017 return RValue::get(
4018 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
4019 ConvertType(E->getType())));
4020 }
4021
4022 case Builtin::BI__builtin_issignaling: {
4023 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4024 Value *V = EmitScalarExpr(E->getArg(0));
4025 return RValue::get(
4026 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
4027 ConvertType(E->getType())));
4028 }
4029
4030 case Builtin::BI__builtin_isinf: {
4031 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4032 Value *V = EmitScalarExpr(E->getArg(0));
4033 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4034 return RValue::get(Result);
4035 return RValue::get(
4036 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
4037 ConvertType(E->getType())));
4038 }
4039
4040 case Builtin::BIfinite:
4041 case Builtin::BI__finite:
4042 case Builtin::BIfinitef:
4043 case Builtin::BI__finitef:
4044 case Builtin::BIfinitel:
4045 case Builtin::BI__finitel:
4046 case Builtin::BI__builtin_isfinite: {
4047 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4048 Value *V = EmitScalarExpr(E->getArg(0));
4049 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4050 return RValue::get(Result);
4051 return RValue::get(
4052 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
4053 ConvertType(E->getType())));
4054 }
4055
4056 case Builtin::BI__builtin_isnormal: {
4057 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4058 Value *V = EmitScalarExpr(E->getArg(0));
4059 return RValue::get(
4060 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
4061 ConvertType(E->getType())));
4062 }
4063
4064 case Builtin::BI__builtin_issubnormal: {
4065 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4066 Value *V = EmitScalarExpr(E->getArg(0));
4067 return RValue::get(
4068 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
4069 ConvertType(E->getType())));
4070 }
4071
4072 case Builtin::BI__builtin_iszero: {
4073 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4074 Value *V = EmitScalarExpr(E->getArg(0));
4075 return RValue::get(
4076 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
4077 ConvertType(E->getType())));
4078 }
4079
4080 case Builtin::BI__builtin_isfpclass: {
4082 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
4083 break;
4084 uint64_t Test = Result.Val.getInt().getLimitedValue();
4085 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4086 Value *V = EmitScalarExpr(E->getArg(0));
4087 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
4088 ConvertType(E->getType())));
4089 }
4090
4091 case Builtin::BI__builtin_nondeterministic_value: {
4092 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
4093
4094 Value *Result = PoisonValue::get(Ty);
4095 Result = Builder.CreateFreeze(Result);
4096
4097 return RValue::get(Result);
4098 }
4099
4100 case Builtin::BI__builtin_elementwise_abs: {
4101 Value *Result;
4102 QualType QT = E->getArg(0)->getType();
4103
4104 if (auto *VecTy = QT->getAs<VectorType>())
4105 QT = VecTy->getElementType();
4106 if (QT->isIntegerType())
4107 Result = Builder.CreateBinaryIntrinsic(
4108 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
4109 Builder.getFalse(), nullptr, "elt.abs");
4110 else
4111 Result = emitBuiltinWithOneOverloadedType<1>(
4112 *this, E, llvm::Intrinsic::fabs, "elt.abs");
4113
4114 return RValue::get(Result);
4115 }
4116 case Builtin::BI__builtin_elementwise_acos:
4117 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4118 *this, E, llvm::Intrinsic::acos, "elt.acos"));
4119 case Builtin::BI__builtin_elementwise_asin:
4120 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4121 *this, E, llvm::Intrinsic::asin, "elt.asin"));
4122 case Builtin::BI__builtin_elementwise_atan:
4123 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4124 *this, E, llvm::Intrinsic::atan, "elt.atan"));
4125 case Builtin::BI__builtin_elementwise_atan2:
4126 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4127 *this, E, llvm::Intrinsic::atan2, "elt.atan2"));
4128 case Builtin::BI__builtin_elementwise_ceil:
4129 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4130 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
4131 case Builtin::BI__builtin_elementwise_exp:
4132 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4133 *this, E, llvm::Intrinsic::exp, "elt.exp"));
4134 case Builtin::BI__builtin_elementwise_exp2:
4135 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4136 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
4137 case Builtin::BI__builtin_elementwise_log:
4138 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4139 *this, E, llvm::Intrinsic::log, "elt.log"));
4140 case Builtin::BI__builtin_elementwise_log2:
4141 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4142 *this, E, llvm::Intrinsic::log2, "elt.log2"));
4143 case Builtin::BI__builtin_elementwise_log10:
4144 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4145 *this, E, llvm::Intrinsic::log10, "elt.log10"));
4146 case Builtin::BI__builtin_elementwise_pow: {
4147 return RValue::get(
4148 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
4149 }
4150 case Builtin::BI__builtin_elementwise_bitreverse:
4151 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4152 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
4153 case Builtin::BI__builtin_elementwise_cos:
4154 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4155 *this, E, llvm::Intrinsic::cos, "elt.cos"));
4156 case Builtin::BI__builtin_elementwise_cosh:
4157 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4158 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
4159 case Builtin::BI__builtin_elementwise_floor:
4160 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4161 *this, E, llvm::Intrinsic::floor, "elt.floor"));
4162 case Builtin::BI__builtin_elementwise_popcount:
4163 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4164 *this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
4165 case Builtin::BI__builtin_elementwise_roundeven:
4166 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4167 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
4168 case Builtin::BI__builtin_elementwise_round:
4169 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4170 *this, E, llvm::Intrinsic::round, "elt.round"));
4171 case Builtin::BI__builtin_elementwise_rint:
4172 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4173 *this, E, llvm::Intrinsic::rint, "elt.rint"));
4174 case Builtin::BI__builtin_elementwise_nearbyint:
4175 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4176 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
4177 case Builtin::BI__builtin_elementwise_sin:
4178 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4179 *this, E, llvm::Intrinsic::sin, "elt.sin"));
4180 case Builtin::BI__builtin_elementwise_sinh:
4181 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4182 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
4183 case Builtin::BI__builtin_elementwise_tan:
4184 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4185 *this, E, llvm::Intrinsic::tan, "elt.tan"));
4186 case Builtin::BI__builtin_elementwise_tanh:
4187 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4188 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
4189 case Builtin::BI__builtin_elementwise_trunc:
4190 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4191 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
4192 case Builtin::BI__builtin_elementwise_canonicalize:
4193 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4194 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
4195 case Builtin::BI__builtin_elementwise_copysign:
4196 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4197 *this, E, llvm::Intrinsic::copysign));
4198 case Builtin::BI__builtin_elementwise_fma:
4199 return RValue::get(
4200 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
4201 case Builtin::BI__builtin_elementwise_add_sat:
4202 case Builtin::BI__builtin_elementwise_sub_sat: {
4203 Value *Op0 = EmitScalarExpr(E->getArg(0));
4204 Value *Op1 = EmitScalarExpr(E->getArg(1));
4205 Value *Result;
4206 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
4207 QualType Ty = E->getArg(0)->getType();
4208 if (auto *VecTy = Ty->getAs<VectorType>())
4209 Ty = VecTy->getElementType();
4210 bool IsSigned = Ty->isSignedIntegerType();
4211 unsigned Opc;
4212 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
4213 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
4214 else
4215 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
4216 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
4217 return RValue::get(Result);
4218 }
4219
4220 case Builtin::BI__builtin_elementwise_max: {
4221 Value *Op0 = EmitScalarExpr(E->getArg(0));
4222 Value *Op1 = EmitScalarExpr(E->getArg(1));
4223 Value *Result;
4224 if (Op0->getType()->isIntOrIntVectorTy()) {
4225 QualType Ty = E->getArg(0)->getType();
4226 if (auto *VecTy = Ty->getAs<VectorType>())
4227 Ty = VecTy->getElementType();
4228 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4229 ? llvm::Intrinsic::smax
4230 : llvm::Intrinsic::umax,
4231 Op0, Op1, nullptr, "elt.max");
4232 } else
4233 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
4234 return RValue::get(Result);
4235 }
4236 case Builtin::BI__builtin_elementwise_min: {
4237 Value *Op0 = EmitScalarExpr(E->getArg(0));
4238 Value *Op1 = EmitScalarExpr(E->getArg(1));
4239 Value *Result;
4240 if (Op0->getType()->isIntOrIntVectorTy()) {
4241 QualType Ty = E->getArg(0)->getType();
4242 if (auto *VecTy = Ty->getAs<VectorType>())
4243 Ty = VecTy->getElementType();
4244 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4245 ? llvm::Intrinsic::smin
4246 : llvm::Intrinsic::umin,
4247 Op0, Op1, nullptr, "elt.min");
4248 } else
4249 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
4250 return RValue::get(Result);
4251 }
4252
4253 case Builtin::BI__builtin_elementwise_maximum: {
4254 Value *Op0 = EmitScalarExpr(E->getArg(0));
4255 Value *Op1 = EmitScalarExpr(E->getArg(1));
4256 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
4257 Op1, nullptr, "elt.maximum");
4258 return RValue::get(Result);
4259 }
4260
4261 case Builtin::BI__builtin_elementwise_minimum: {
4262 Value *Op0 = EmitScalarExpr(E->getArg(0));
4263 Value *Op1 = EmitScalarExpr(E->getArg(1));
4264 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
4265 Op1, nullptr, "elt.minimum");
4266 return RValue::get(Result);
4267 }
4268
4269 case Builtin::BI__builtin_reduce_max: {
4270 auto GetIntrinsicID = [this](QualType QT) {
4271 if (auto *VecTy = QT->getAs<VectorType>())
4272 QT = VecTy->getElementType();
4273 else if (QT->isSizelessVectorType())
4275
4276 if (QT->isSignedIntegerType())
4277 return llvm::Intrinsic::vector_reduce_smax;
4278 if (QT->isUnsignedIntegerType())
4279 return llvm::Intrinsic::vector_reduce_umax;
4280 assert(QT->isFloatingType() && "must have a float here");
4281 return llvm::Intrinsic::vector_reduce_fmax;
4282 };
4283 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4284 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4285 }
4286
4287 case Builtin::BI__builtin_reduce_min: {
4288 auto GetIntrinsicID = [this](QualType QT) {
4289 if (auto *VecTy = QT->getAs<VectorType>())
4290 QT = VecTy->getElementType();
4291 else if (QT->isSizelessVectorType())
4293
4294 if (QT->isSignedIntegerType())
4295 return llvm::Intrinsic::vector_reduce_smin;
4296 if (QT->isUnsignedIntegerType())
4297 return llvm::Intrinsic::vector_reduce_umin;
4298 assert(QT->isFloatingType() && "must have a float here");
4299 return llvm::Intrinsic::vector_reduce_fmin;
4300 };
4301
4302 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4303 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4304 }
4305
4306 case Builtin::BI__builtin_reduce_add:
4307 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4308 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
4309 case Builtin::BI__builtin_reduce_mul:
4310 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4311 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
4312 case Builtin::BI__builtin_reduce_xor:
4313 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4314 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
4315 case Builtin::BI__builtin_reduce_or:
4316 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4317 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
4318 case Builtin::BI__builtin_reduce_and:
4319 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4320 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
4321 case Builtin::BI__builtin_reduce_maximum:
4322 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4323 *this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
4324 case Builtin::BI__builtin_reduce_minimum:
4325 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4326 *this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
4327
4328 case Builtin::BI__builtin_matrix_transpose: {
4329 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
4330 Value *MatValue = EmitScalarExpr(E->getArg(0));
4331 MatrixBuilder MB(Builder);
4332 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
4333 MatrixTy->getNumColumns());
4334 return RValue::get(Result);
4335 }
4336
4337 case Builtin::BI__builtin_matrix_column_major_load: {
4338 MatrixBuilder MB(Builder);
4339 // Emit everything that isn't dependent on the first parameter type
4340 Value *Stride = EmitScalarExpr(E->getArg(3));
4341 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
4342 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
4343 assert(PtrTy && "arg0 must be of pointer type");
4344 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4345
4346 Address Src = EmitPointerWithAlignment(E->getArg(0));
4348 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4349 0);
4350 Value *Result = MB.CreateColumnMajorLoad(
4351 Src.getElementType(), Src.emitRawPointer(*this),
4352 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4353 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4354 return RValue::get(Result);
4355 }
4356
4357 case Builtin::BI__builtin_matrix_column_major_store: {
4358 MatrixBuilder MB(Builder);
4359 Value *Matrix = EmitScalarExpr(E->getArg(0));
4360 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4361 Value *Stride = EmitScalarExpr(E->getArg(2));
4362
4363 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4364 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4365 assert(PtrTy && "arg1 must be of pointer type");
4366 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4367
4369 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4370 0);
4371 Value *Result = MB.CreateColumnMajorStore(
4372 Matrix, Dst.emitRawPointer(*this),
4373 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4374 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4375 return RValue::get(Result);
4376 }
4377
4378 case Builtin::BI__builtin_isinf_sign: {
4379 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4380 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4381 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4382 Value *Arg = EmitScalarExpr(E->getArg(0));
4383 Value *AbsArg = EmitFAbs(*this, Arg);
4384 Value *IsInf = Builder.CreateFCmpOEQ(
4385 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4386 Value *IsNeg = EmitSignBit(*this, Arg);
4387
4388 llvm::Type *IntTy = ConvertType(E->getType());
4389 Value *Zero = Constant::getNullValue(IntTy);
4390 Value *One = ConstantInt::get(IntTy, 1);
4391 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4392 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4393 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4394 return RValue::get(Result);
4395 }
4396
4397 case Builtin::BI__builtin_flt_rounds: {
4398 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4399
4400 llvm::Type *ResultType = ConvertType(E->getType());
4401 Value *Result = Builder.CreateCall(F);
4402 if (Result->getType() != ResultType)
4403 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4404 "cast");
4405 return RValue::get(Result);
4406 }
4407
4408 case Builtin::BI__builtin_set_flt_rounds: {
4409 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4410
4411 Value *V = EmitScalarExpr(E->getArg(0));
4412 Builder.CreateCall(F, V);
4413 return RValue::get(nullptr);
4414 }
4415
4416 case Builtin::BI__builtin_fpclassify: {
4417 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4418 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4419 Value *V = EmitScalarExpr(E->getArg(5));
4420 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4421
4422 // Create Result
4423 BasicBlock *Begin = Builder.GetInsertBlock();
4424 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4425 Builder.SetInsertPoint(End);
4426 PHINode *Result =
4427 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4428 "fpclassify_result");
4429
4430 // if (V==0) return FP_ZERO
4431 Builder.SetInsertPoint(Begin);
4432 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4433 "iszero");
4434 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4435 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4436 Builder.CreateCondBr(IsZero, End, NotZero);
4437 Result->addIncoming(ZeroLiteral, Begin);
4438
4439 // if (V != V) return FP_NAN
4440 Builder.SetInsertPoint(NotZero);
4441 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4442 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4443 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4444 Builder.CreateCondBr(IsNan, End, NotNan);
4445 Result->addIncoming(NanLiteral, NotZero);
4446
4447 // if (fabs(V) == infinity) return FP_INFINITY
4448 Builder.SetInsertPoint(NotNan);
4449 Value *VAbs = EmitFAbs(*this, V);
4450 Value *IsInf =
4451 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4452 "isinf");
4453 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4454 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4455 Builder.CreateCondBr(IsInf, End, NotInf);
4456 Result->addIncoming(InfLiteral, NotNan);
4457
4458 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4459 Builder.SetInsertPoint(NotInf);
4460 APFloat Smallest = APFloat::getSmallestNormalized(
4461 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4462 Value *IsNormal =
4463 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4464 "isnormal");
4465 Value *NormalResult =
4466 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4467 EmitScalarExpr(E->getArg(3)));
4468 Builder.CreateBr(End);
4469 Result->addIncoming(NormalResult, NotInf);
4470
4471 // return Result
4472 Builder.SetInsertPoint(End);
4473 return RValue::get(Result);
4474 }
4475
4476 // An alloca will always return a pointer to the alloca (stack) address
4477 // space. This address space need not be the same as the AST / Language
4478 // default (e.g. in C / C++ auto vars are in the generic address space). At
4479 // the AST level this is handled within CreateTempAlloca et al., but for the
4480 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4481 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4482 case Builtin::BIalloca:
4483 case Builtin::BI_alloca:
4484 case Builtin::BI__builtin_alloca_uninitialized:
4485 case Builtin::BI__builtin_alloca: {
4486 Value *Size = EmitScalarExpr(E->getArg(0));
4487 const TargetInfo &TI = getContext().getTargetInfo();
4488 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4489 const Align SuitableAlignmentInBytes =
4490 CGM.getContext()
4492 .getAsAlign();
4493 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4494 AI->setAlignment(SuitableAlignmentInBytes);
4495 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4496 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4499 if (AAS != EAS) {
4500 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4501 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4502 EAS, Ty));
4503 }
4504 return RValue::get(AI);
4505 }
4506
4507 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4508 case Builtin::BI__builtin_alloca_with_align: {
4509 Value *Size = EmitScalarExpr(E->getArg(0));
4510 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4511 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4512 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4513 const Align AlignmentInBytes =
4514 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4515 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4516 AI->setAlignment(AlignmentInBytes);
4517 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4518 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4521 if (AAS != EAS) {
4522 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4523 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4524 EAS, Ty));
4525 }
4526 return RValue::get(AI);
4527 }
4528
4529 case Builtin::BIbzero:
4530 case Builtin::BI__builtin_bzero: {
4531 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4532 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4533 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4534 E->getArg(0)->getExprLoc(), FD, 0);
4535 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4536 return RValue::get(nullptr);
4537 }
4538
4539 case Builtin::BIbcopy:
4540 case Builtin::BI__builtin_bcopy: {
4541 Address Src = EmitPointerWithAlignment(E->getArg(0));
4542 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4543 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4545 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4546 0);
4548 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4549 0);
4550 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4551 return RValue::get(nullptr);
4552 }
4553
4554 case Builtin::BImemcpy:
4555 case Builtin::BI__builtin_memcpy:
4556 case Builtin::BImempcpy:
4557 case Builtin::BI__builtin_mempcpy: {
4558 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4559 Address Src = EmitPointerWithAlignment(E->getArg(1));
4560 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4561 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4562 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4563 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4564 if (BuiltinID == Builtin::BImempcpy ||
4565 BuiltinID == Builtin::BI__builtin_mempcpy)
4567 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4568 else
4569 return RValue::get(Dest, *this);
4570 }
4571
4572 case Builtin::BI__builtin_memcpy_inline: {
4573 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4574 Address Src = EmitPointerWithAlignment(E->getArg(1));
4575 uint64_t Size =
4576 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4577 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4578 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4579 Builder.CreateMemCpyInline(Dest, Src, Size);
4580 return RValue::get(nullptr);
4581 }
4582
4583 case Builtin::BI__builtin_char_memchr:
4584 BuiltinID = Builtin::BI__builtin_memchr;
4585 break;
4586
4587 case Builtin::BI__builtin___memcpy_chk: {
4588 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4589 Expr::EvalResult SizeResult, DstSizeResult;
4590 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4591 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4592 break;
4593 llvm::APSInt Size = SizeResult.Val.getInt();
4594 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4595 if (Size.ugt(DstSize))
4596 break;
4597 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4598 Address Src = EmitPointerWithAlignment(E->getArg(1));
4599 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4600 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4601 return RValue::get(Dest, *this);
4602 }
4603
4604 case Builtin::BI__builtin_objc_memmove_collectable: {
4605 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4606 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4607 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4609 DestAddr, SrcAddr, SizeVal);
4610 return RValue::get(DestAddr, *this);
4611 }
4612
4613 case Builtin::BI__builtin___memmove_chk: {
4614 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4615 Expr::EvalResult SizeResult, DstSizeResult;
4616 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4617 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4618 break;
4619 llvm::APSInt Size = SizeResult.Val.getInt();
4620 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4621 if (Size.ugt(DstSize))
4622 break;
4623 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4624 Address Src = EmitPointerWithAlignment(E->getArg(1));
4625 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4626 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4627 return RValue::get(Dest, *this);
4628 }
4629
4630 case Builtin::BImemmove:
4631 case Builtin::BI__builtin_memmove: {
4632 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4633 Address Src = EmitPointerWithAlignment(E->getArg(1));
4634 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4635 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4636 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4637 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4638 return RValue::get(Dest, *this);
4639 }
4640 case Builtin::BImemset:
4641 case Builtin::BI__builtin_memset: {
4642 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4643 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4644 Builder.getInt8Ty());
4645 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4646 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4647 E->getArg(0)->getExprLoc(), FD, 0);
4648 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4649 return RValue::get(Dest, *this);
4650 }
4651 case Builtin::BI__builtin_memset_inline: {
4652 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4653 Value *ByteVal =
4654 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4655 uint64_t Size =
4656 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4658 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4659 0);
4660 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4661 return RValue::get(nullptr);
4662 }
4663 case Builtin::BI__builtin___memset_chk: {
4664 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4665 Expr::EvalResult SizeResult, DstSizeResult;
4666 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4667 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4668 break;
4669 llvm::APSInt Size = SizeResult.Val.getInt();
4670 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4671 if (Size.ugt(DstSize))
4672 break;
4673 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4674 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4675 Builder.getInt8Ty());
4676 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4677 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4678 return RValue::get(Dest, *this);
4679 }
4680 case Builtin::BI__builtin_wmemchr: {
4681 // The MSVC runtime library does not provide a definition of wmemchr, so we
4682 // need an inline implementation.
4683 if (!getTarget().getTriple().isOSMSVCRT())
4684 break;
4685
4686 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4687 Value *Str = EmitScalarExpr(E->getArg(0));
4688 Value *Chr = EmitScalarExpr(E->getArg(1));
4689 Value *Size = EmitScalarExpr(E->getArg(2));
4690
4691 BasicBlock *Entry = Builder.GetInsertBlock();
4692 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4693 BasicBlock *Next = createBasicBlock("wmemchr.next");
4694 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4695 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4696 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4697
4698 EmitBlock(CmpEq);
4699 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4700 StrPhi->addIncoming(Str, Entry);
4701 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4702 SizePhi->addIncoming(Size, Entry);
4703 CharUnits WCharAlign =
4705 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4706 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4707 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4708 Builder.CreateCondBr(StrEqChr, Exit, Next);
4709
4710 EmitBlock(Next);
4711 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4712 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4713 Value *NextSizeEq0 =
4714 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4715 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4716 StrPhi->addIncoming(NextStr, Next);
4717 SizePhi->addIncoming(NextSize, Next);
4718
4719 EmitBlock(Exit);
4720 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4721 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4722 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4723 Ret->addIncoming(FoundChr, CmpEq);
4724 return RValue::get(Ret);
4725 }
4726 case Builtin::BI__builtin_wmemcmp: {
4727 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4728 // need an inline implementation.
4729 if (!getTarget().getTriple().isOSMSVCRT())
4730 break;
4731
4732 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4733
4734 Value *Dst = EmitScalarExpr(E->getArg(0));
4735 Value *Src = EmitScalarExpr(E->getArg(1));
4736 Value *Size = EmitScalarExpr(E->getArg(2));
4737
4738 BasicBlock *Entry = Builder.GetInsertBlock();
4739 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4740 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4741 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4742 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4743 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4744 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4745
4746 EmitBlock(CmpGT);
4747 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4748 DstPhi->addIncoming(Dst, Entry);
4749 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4750 SrcPhi->addIncoming(Src, Entry);
4751 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4752 SizePhi->addIncoming(Size, Entry);
4753 CharUnits WCharAlign =
4755 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4756 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4757 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4758 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4759
4760 EmitBlock(CmpLT);
4761 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4762 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4763
4764 EmitBlock(Next);
4765 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4766 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4767 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4768 Value *NextSizeEq0 =
4769 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4770 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4771 DstPhi->addIncoming(NextDst, Next);
4772 SrcPhi->addIncoming(NextSrc, Next);
4773 SizePhi->addIncoming(NextSize, Next);
4774
4775 EmitBlock(Exit);
4776 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4777 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4778 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4779 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4780 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4781 return RValue::get(Ret);
4782 }
4783 case Builtin::BI__builtin_dwarf_cfa: {
4784 // The offset in bytes from the first argument to the CFA.
4785 //
4786 // Why on earth is this in the frontend? Is there any reason at
4787 // all that the backend can't reasonably determine this while
4788 // lowering llvm.eh.dwarf.cfa()?
4789 //
4790 // TODO: If there's a satisfactory reason, add a target hook for
4791 // this instead of hard-coding 0, which is correct for most targets.
4792 int32_t Offset = 0;
4793
4794 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4795 return RValue::get(Builder.CreateCall(F,
4796 llvm::ConstantInt::get(Int32Ty, Offset)));
4797 }
4798 case Builtin::BI__builtin_return_address: {
4799 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4800 getContext().UnsignedIntTy);
4801 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4802 return RValue::get(Builder.CreateCall(F, Depth));
4803 }
4804 case Builtin::BI_ReturnAddress: {
4805 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4806 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4807 }
4808 case Builtin::BI__builtin_frame_address: {
4809 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4810 getContext().UnsignedIntTy);
4811 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4812 return RValue::get(Builder.CreateCall(F, Depth));
4813 }
4814 case Builtin::BI__builtin_extract_return_addr: {
4815 Value *Address = EmitScalarExpr(E->getArg(0));
4817 return RValue::get(Result);
4818 }
4819 case Builtin::BI__builtin_frob_return_addr: {
4820 Value *Address = EmitScalarExpr(E->getArg(0));
4822 return RValue::get(Result);
4823 }
4824 case Builtin::BI__builtin_dwarf_sp_column: {
4825 llvm::IntegerType *Ty
4826 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4828 if (Column == -1) {
4829 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4830 return RValue::get(llvm::UndefValue::get(Ty));
4831 }
4832 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4833 }
4834 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4835 Value *Address = EmitScalarExpr(E->getArg(0));
4836 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4837 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4838 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4839 }
4840 case Builtin::BI__builtin_eh_return: {
4841 Value *Int = EmitScalarExpr(E->getArg(0));
4842 Value *Ptr = EmitScalarExpr(E->getArg(1));
4843
4844 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4845 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4846 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4847 Function *F =
4848 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4849 : Intrinsic::eh_return_i64);
4850 Builder.CreateCall(F, {Int, Ptr});
4851 Builder.CreateUnreachable();
4852
4853 // We do need to preserve an insertion point.
4854 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4855
4856 return RValue::get(nullptr);
4857 }
4858 case Builtin::BI__builtin_unwind_init: {
4859 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4860 Builder.CreateCall(F);
4861 return RValue::get(nullptr);
4862 }
4863 case Builtin::BI__builtin_extend_pointer: {
4864 // Extends a pointer to the size of an _Unwind_Word, which is
4865 // uint64_t on all platforms. Generally this gets poked into a
4866 // register and eventually used as an address, so if the
4867 // addressing registers are wider than pointers and the platform
4868 // doesn't implicitly ignore high-order bits when doing
4869 // addressing, we need to make sure we zext / sext based on
4870 // the platform's expectations.
4871 //
4872 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4873
4874 // Cast the pointer to intptr_t.
4875 Value *Ptr = EmitScalarExpr(E->getArg(0));
4876 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4877
4878 // If that's 64 bits, we're done.
4879 if (IntPtrTy->getBitWidth() == 64)
4880 return RValue::get(Result);
4881
4882 // Otherwise, ask the codegen data what to do.
4883 if (getTargetHooks().extendPointerWithSExt())
4884 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4885 else
4886 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4887 }
4888 case Builtin::BI__builtin_setjmp: {
4889 // Buffer is a void**.
4890 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4891
4892 if (getTarget().getTriple().getArch() == llvm::Triple::systemz) {
4893 // On this target, the back end fills in the context buffer completely.
4894 // It doesn't really matter if the frontend stores to the buffer before
4895 // calling setjmp, the back-end is going to overwrite them anyway.
4896 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4897 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4898 }
4899
4900 // Store the frame pointer to the setjmp buffer.
4901 Value *FrameAddr = Builder.CreateCall(
4902 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4903 ConstantInt::get(Int32Ty, 0));
4904 Builder.CreateStore(FrameAddr, Buf);
4905
4906 // Store the stack pointer to the setjmp buffer.
4907 Value *StackAddr = Builder.CreateStackSave();
4908 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4909
4910 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4911 Builder.CreateStore(StackAddr, StackSaveSlot);
4912
4913 // Call LLVM's EH setjmp, which is lightweight.
4914 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4915 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4916 }
4917 case Builtin::BI__builtin_longjmp: {
4918 Value *Buf = EmitScalarExpr(E->getArg(0));
4919
4920 // Call LLVM's EH longjmp, which is lightweight.
4921 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4922
4923 // longjmp doesn't return; mark this as unreachable.
4924 Builder.CreateUnreachable();
4925
4926 // We do need to preserve an insertion point.
4927 EmitBlock(createBasicBlock("longjmp.cont"));
4928
4929 return RValue::get(nullptr);
4930 }
4931 case Builtin::BI__builtin_launder: {
4932 const Expr *Arg = E->getArg(0);
4933 QualType ArgTy = Arg->getType()->getPointeeType();
4934 Value *Ptr = EmitScalarExpr(Arg);
4935 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4937
4938 return RValue::get(Ptr);
4939 }
4940 case Builtin::BI__sync_fetch_and_add:
4941 case Builtin::BI__sync_fetch_and_sub:
4942 case Builtin::BI__sync_fetch_and_or:
4943 case Builtin::BI__sync_fetch_and_and:
4944 case Builtin::BI__sync_fetch_and_xor:
4945 case Builtin::BI__sync_fetch_and_nand:
4946 case Builtin::BI__sync_add_and_fetch:
4947 case Builtin::BI__sync_sub_and_fetch:
4948 case Builtin::BI__sync_and_and_fetch:
4949 case Builtin::BI__sync_or_and_fetch:
4950 case Builtin::BI__sync_xor_and_fetch:
4951 case Builtin::BI__sync_nand_and_fetch:
4952 case Builtin::BI__sync_val_compare_and_swap:
4953 case Builtin::BI__sync_bool_compare_and_swap:
4954 case Builtin::BI__sync_lock_test_and_set:
4955 case Builtin::BI__sync_lock_release:
4956 case Builtin::BI__sync_swap:
4957 llvm_unreachable("Shouldn't make it through sema");
4958 case Builtin::BI__sync_fetch_and_add_1:
4959 case Builtin::BI__sync_fetch_and_add_2:
4960 case Builtin::BI__sync_fetch_and_add_4:
4961 case Builtin::BI__sync_fetch_and_add_8:
4962 case Builtin::BI__sync_fetch_and_add_16:
4963 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4964 case Builtin::BI__sync_fetch_and_sub_1:
4965 case Builtin::BI__sync_fetch_and_sub_2:
4966 case Builtin::BI__sync_fetch_and_sub_4:
4967 case Builtin::BI__sync_fetch_and_sub_8:
4968 case Builtin::BI__sync_fetch_and_sub_16:
4969 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4970 case Builtin::BI__sync_fetch_and_or_1:
4971 case Builtin::BI__sync_fetch_and_or_2:
4972 case Builtin::BI__sync_fetch_and_or_4:
4973 case Builtin::BI__sync_fetch_and_or_8:
4974 case Builtin::BI__sync_fetch_and_or_16:
4975 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4976 case Builtin::BI__sync_fetch_and_and_1:
4977 case Builtin::BI__sync_fetch_and_and_2:
4978 case Builtin::BI__sync_fetch_and_and_4:
4979 case Builtin::BI__sync_fetch_and_and_8:
4980 case Builtin::BI__sync_fetch_and_and_16:
4981 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4982 case Builtin::BI__sync_fetch_and_xor_1:
4983 case Builtin::BI__sync_fetch_and_xor_2:
4984 case Builtin::BI__sync_fetch_and_xor_4:
4985 case Builtin::BI__sync_fetch_and_xor_8:
4986 case Builtin::BI__sync_fetch_and_xor_16:
4987 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4988 case Builtin::BI__sync_fetch_and_nand_1:
4989 case Builtin::BI__sync_fetch_and_nand_2:
4990 case Builtin::BI__sync_fetch_and_nand_4:
4991 case Builtin::BI__sync_fetch_and_nand_8:
4992 case Builtin::BI__sync_fetch_and_nand_16:
4993 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4994
4995 // Clang extensions: not overloaded yet.
4996 case Builtin::BI__sync_fetch_and_min:
4997 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4998 case Builtin::BI__sync_fetch_and_max:
4999 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
5000 case Builtin::BI__sync_fetch_and_umin:
5001 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
5002 case Builtin::BI__sync_fetch_and_umax:
5003 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
5004
5005 case Builtin::BI__sync_add_and_fetch_1:
5006 case Builtin::BI__sync_add_and_fetch_2:
5007 case Builtin::BI__sync_add_and_fetch_4:
5008 case Builtin::BI__sync_add_and_fetch_8:
5009 case Builtin::BI__sync_add_and_fetch_16:
5010 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
5011 llvm::Instruction::Add);
5012 case Builtin::BI__sync_sub_and_fetch_1:
5013 case Builtin::BI__sync_sub_and_fetch_2:
5014 case Builtin::BI__sync_sub_and_fetch_4:
5015 case Builtin::BI__sync_sub_and_fetch_8:
5016 case Builtin::BI__sync_sub_and_fetch_16:
5017 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
5018 llvm::Instruction::Sub);
5019 case Builtin::BI__sync_and_and_fetch_1:
5020 case Builtin::BI__sync_and_and_fetch_2:
5021 case Builtin::BI__sync_and_and_fetch_4:
5022 case Builtin::BI__sync_and_and_fetch_8:
5023 case Builtin::BI__sync_and_and_fetch_16:
5024 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
5025 llvm::Instruction::And);
5026 case Builtin::BI__sync_or_and_fetch_1:
5027 case Builtin::BI__sync_or_and_fetch_2:
5028 case Builtin::BI__sync_or_and_fetch_4:
5029 case Builtin::BI__sync_or_and_fetch_8:
5030 case Builtin::BI__sync_or_and_fetch_16:
5031 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
5032 llvm::Instruction::Or);
5033 case Builtin::BI__sync_xor_and_fetch_1:
5034 case Builtin::BI__sync_xor_and_fetch_2:
5035 case Builtin::BI__sync_xor_and_fetch_4:
5036 case Builtin::BI__sync_xor_and_fetch_8:
5037 case Builtin::BI__sync_xor_and_fetch_16:
5038 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
5039 llvm::Instruction::Xor);
5040 case Builtin::BI__sync_nand_and_fetch_1:
5041 case Builtin::BI__sync_nand_and_fetch_2:
5042 case Builtin::BI__sync_nand_and_fetch_4:
5043 case Builtin::BI__sync_nand_and_fetch_8:
5044 case Builtin::BI__sync_nand_and_fetch_16:
5045 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
5046 llvm::Instruction::And, true);
5047
5048 case Builtin::BI__sync_val_compare_and_swap_1:
5049 case Builtin::BI__sync_val_compare_and_swap_2:
5050 case Builtin::BI__sync_val_compare_and_swap_4:
5051 case Builtin::BI__sync_val_compare_and_swap_8:
5052 case Builtin::BI__sync_val_compare_and_swap_16:
5053 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
5054
5055 case Builtin::BI__sync_bool_compare_and_swap_1:
5056 case Builtin::BI__sync_bool_compare_and_swap_2:
5057 case Builtin::BI__sync_bool_compare_and_swap_4:
5058 case Builtin::BI__sync_bool_compare_and_swap_8:
5059 case Builtin::BI__sync_bool_compare_and_swap_16:
5060 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
5061
5062 case Builtin::BI__sync_swap_1:
5063 case Builtin::BI__sync_swap_2:
5064 case Builtin::BI__sync_swap_4:
5065 case Builtin::BI__sync_swap_8:
5066 case Builtin::BI__sync_swap_16:
5067 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5068
5069 case Builtin::BI__sync_lock_test_and_set_1:
5070 case Builtin::BI__sync_lock_test_and_set_2:
5071 case Builtin::BI__sync_lock_test_and_set_4:
5072 case Builtin::BI__sync_lock_test_and_set_8:
5073 case Builtin::BI__sync_lock_test_and_set_16:
5074 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5075
5076 case Builtin::BI__sync_lock_release_1:
5077 case Builtin::BI__sync_lock_release_2:
5078 case Builtin::BI__sync_lock_release_4:
5079 case Builtin::BI__sync_lock_release_8:
5080 case Builtin::BI__sync_lock_release_16: {
5081 Address Ptr = CheckAtomicAlignment(*this, E);
5082 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5083
5084 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5085 getContext().getTypeSize(ElTy));
5086 llvm::StoreInst *Store =
5087 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
5088 Store->setAtomic(llvm::AtomicOrdering::Release);
5089 return RValue::get(nullptr);
5090 }
5091
5092 case Builtin::BI__sync_synchronize: {
5093 // We assume this is supposed to correspond to a C++0x-style
5094 // sequentially-consistent fence (i.e. this is only usable for
5095 // synchronization, not device I/O or anything like that). This intrinsic
5096 // is really badly designed in the sense that in theory, there isn't
5097 // any way to safely use it... but in practice, it mostly works
5098 // to use it with non-atomic loads and stores to get acquire/release
5099 // semantics.
5100 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
5101 return RValue::get(nullptr);
5102 }
5103
5104 case Builtin::BI__builtin_nontemporal_load:
5105 return RValue::get(EmitNontemporalLoad(*this, E));
5106 case Builtin::BI__builtin_nontemporal_store:
5107 return RValue::get(EmitNontemporalStore(*this, E));
5108 case Builtin::BI__c11_atomic_is_lock_free:
5109 case Builtin::BI__atomic_is_lock_free: {
5110 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
5111 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
5112 // _Atomic(T) is always properly-aligned.
5113 const char *LibCallName = "__atomic_is_lock_free";
5114 CallArgList Args;
5115 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
5116 getContext().getSizeType());
5117 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
5118 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
5120 else
5121 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
5123 const CGFunctionInfo &FuncInfo =
5125 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
5126 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
5127 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
5128 ReturnValueSlot(), Args);
5129 }
5130
5131 case Builtin::BI__atomic_thread_fence:
5132 case Builtin::BI__atomic_signal_fence:
5133 case Builtin::BI__c11_atomic_thread_fence:
5134 case Builtin::BI__c11_atomic_signal_fence: {
5135 llvm::SyncScope::ID SSID;
5136 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
5137 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
5138 SSID = llvm::SyncScope::SingleThread;
5139 else
5140 SSID = llvm::SyncScope::System;
5141 Value *Order = EmitScalarExpr(E->getArg(0));
5142 if (isa<llvm::ConstantInt>(Order)) {
5143 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5144 switch (ord) {
5145 case 0: // memory_order_relaxed
5146 default: // invalid order
5147 break;
5148 case 1: // memory_order_consume
5149 case 2: // memory_order_acquire
5150 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5151 break;
5152 case 3: // memory_order_release
5153 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5154 break;
5155 case 4: // memory_order_acq_rel
5156 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5157 break;
5158 case 5: // memory_order_seq_cst
5159 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5160 break;
5161 }
5162 return RValue::get(nullptr);
5163 }
5164
5165 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
5166 AcquireBB = createBasicBlock("acquire", CurFn);
5167 ReleaseBB = createBasicBlock("release", CurFn);
5168 AcqRelBB = createBasicBlock("acqrel", CurFn);
5169 SeqCstBB = createBasicBlock("seqcst", CurFn);
5170 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5171
5172 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5173 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5174
5175 Builder.SetInsertPoint(AcquireBB);
5176 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5177 Builder.CreateBr(ContBB);
5178 SI->addCase(Builder.getInt32(1), AcquireBB);
5179 SI->addCase(Builder.getInt32(2), AcquireBB);
5180
5181 Builder.SetInsertPoint(ReleaseBB);
5182 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5183 Builder.CreateBr(ContBB);
5184 SI->addCase(Builder.getInt32(3), ReleaseBB);
5185
5186 Builder.SetInsertPoint(AcqRelBB);
5187 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5188 Builder.CreateBr(ContBB);
5189 SI->addCase(Builder.getInt32(4), AcqRelBB);
5190
5191 Builder.SetInsertPoint(SeqCstBB);
5192 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5193 Builder.CreateBr(ContBB);
5194 SI->addCase(Builder.getInt32(5), SeqCstBB);
5195
5196 Builder.SetInsertPoint(ContBB);
5197 return RValue::get(nullptr);
5198 }
5199 case Builtin::BI__scoped_atomic_thread_fence: {
5201
5202 Value *Order = EmitScalarExpr(E->getArg(0));
5203 Value *Scope = EmitScalarExpr(E->getArg(1));
5204 auto Ord = dyn_cast<llvm::ConstantInt>(Order);
5205 auto Scp = dyn_cast<llvm::ConstantInt>(Scope);
5206 if (Ord && Scp) {
5207 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5208 ? ScopeModel->map(Scp->getZExtValue())
5209 : ScopeModel->map(ScopeModel->getFallBackValue());
5210 switch (Ord->getZExtValue()) {
5211 case 0: // memory_order_relaxed
5212 default: // invalid order
5213 break;
5214 case 1: // memory_order_consume
5215 case 2: // memory_order_acquire
5216 Builder.CreateFence(
5217 llvm::AtomicOrdering::Acquire,
5218 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5219 llvm::AtomicOrdering::Acquire,
5220 getLLVMContext()));
5221 break;
5222 case 3: // memory_order_release
5223 Builder.CreateFence(
5224 llvm::AtomicOrdering::Release,
5225 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5226 llvm::AtomicOrdering::Release,
5227 getLLVMContext()));
5228 break;
5229 case 4: // memory_order_acq_rel
5230 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
5231 getTargetHooks().getLLVMSyncScopeID(
5232 getLangOpts(), SS,
5233 llvm::AtomicOrdering::AcquireRelease,
5234 getLLVMContext()));
5235 break;
5236 case 5: // memory_order_seq_cst
5237 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5238 getTargetHooks().getLLVMSyncScopeID(
5239 getLangOpts(), SS,
5240 llvm::AtomicOrdering::SequentiallyConsistent,
5241 getLLVMContext()));
5242 break;
5243 }
5244 return RValue::get(nullptr);
5245 }
5246
5247 llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
5248
5250 OrderBBs;
5251 if (Ord) {
5252 switch (Ord->getZExtValue()) {
5253 case 0: // memory_order_relaxed
5254 default: // invalid order
5255 ContBB->eraseFromParent();
5256 return RValue::get(nullptr);
5257 case 1: // memory_order_consume
5258 case 2: // memory_order_acquire
5259 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5260 llvm::AtomicOrdering::Acquire);
5261 break;
5262 case 3: // memory_order_release
5263 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5264 llvm::AtomicOrdering::Release);
5265 break;
5266 case 4: // memory_order_acq_rel
5267 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5268 llvm::AtomicOrdering::AcquireRelease);
5269 break;
5270 case 5: // memory_order_seq_cst
5271 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5272 llvm::AtomicOrdering::SequentiallyConsistent);
5273 break;
5274 }
5275 } else {
5276 llvm::BasicBlock *AcquireBB = createBasicBlock("acquire", CurFn);
5277 llvm::BasicBlock *ReleaseBB = createBasicBlock("release", CurFn);
5278 llvm::BasicBlock *AcqRelBB = createBasicBlock("acqrel", CurFn);
5279 llvm::BasicBlock *SeqCstBB = createBasicBlock("seqcst", CurFn);
5280
5281 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5282 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5283 SI->addCase(Builder.getInt32(1), AcquireBB);
5284 SI->addCase(Builder.getInt32(2), AcquireBB);
5285 SI->addCase(Builder.getInt32(3), ReleaseBB);
5286 SI->addCase(Builder.getInt32(4), AcqRelBB);
5287 SI->addCase(Builder.getInt32(5), SeqCstBB);
5288
5289 OrderBBs.emplace_back(AcquireBB, llvm::AtomicOrdering::Acquire);
5290 OrderBBs.emplace_back(ReleaseBB, llvm::AtomicOrdering::Release);
5291 OrderBBs.emplace_back(AcqRelBB, llvm::AtomicOrdering::AcquireRelease);
5292 OrderBBs.emplace_back(SeqCstBB,
5293 llvm::AtomicOrdering::SequentiallyConsistent);
5294 }
5295
5296 for (auto &[OrderBB, Ordering] : OrderBBs) {
5297 Builder.SetInsertPoint(OrderBB);
5298 if (Scp) {
5299 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5300 ? ScopeModel->map(Scp->getZExtValue())
5301 : ScopeModel->map(ScopeModel->getFallBackValue());
5302 Builder.CreateFence(Ordering,
5303 getTargetHooks().getLLVMSyncScopeID(
5304 getLangOpts(), SS, Ordering, getLLVMContext()));
5305 Builder.CreateBr(ContBB);
5306 } else {
5307 llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
5308 for (unsigned Scp : ScopeModel->getRuntimeValues())
5309 BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
5310
5311 auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
5312 llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
5313 for (unsigned Scp : ScopeModel->getRuntimeValues()) {
5314 auto *B = BBs[Scp];
5315 SI->addCase(Builder.getInt32(Scp), B);
5316
5317 Builder.SetInsertPoint(B);
5318 Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
5319 getLangOpts(), ScopeModel->map(Scp),
5320 Ordering, getLLVMContext()));
5321 Builder.CreateBr(ContBB);
5322 }
5323 }
5324 }
5325
5326 Builder.SetInsertPoint(ContBB);
5327 return RValue::get(nullptr);
5328 }
5329
5330 case Builtin::BI__builtin_signbit:
5331 case Builtin::BI__builtin_signbitf:
5332 case Builtin::BI__builtin_signbitl: {
5333 return RValue::get(
5334 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
5335 ConvertType(E->getType())));
5336 }
5337 case Builtin::BI__warn_memset_zero_len:
5338 return RValue::getIgnored();
5339 case Builtin::BI__annotation: {
5340 // Re-encode each wide string to UTF8 and make an MDString.
5342 for (const Expr *Arg : E->arguments()) {
5343 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
5344 assert(Str->getCharByteWidth() == 2);
5345 StringRef WideBytes = Str->getBytes();
5346 std::string StrUtf8;
5347 if (!convertUTF16ToUTF8String(
5348 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5349 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5350 continue;
5351 }
5352 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5353 }
5354
5355 // Build and MDTuple of MDStrings and emit the intrinsic call.
5356 llvm::Function *F =
5357 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5358 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5359 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5360 return RValue::getIgnored();
5361 }
5362 case Builtin::BI__builtin_annotation: {
5363 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5364 llvm::Function *F =
5365 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5366 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5367
5368 // Get the annotation string, go through casts. Sema requires this to be a
5369 // non-wide string literal, potentially casted, so the cast<> is safe.
5370 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5371 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5372 return RValue::get(
5373 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5374 }
5375 case Builtin::BI__builtin_addcb:
5376 case Builtin::BI__builtin_addcs:
5377 case Builtin::BI__builtin_addc:
5378 case Builtin::BI__builtin_addcl:
5379 case Builtin::BI__builtin_addcll:
5380 case Builtin::BI__builtin_subcb:
5381 case Builtin::BI__builtin_subcs:
5382 case Builtin::BI__builtin_subc:
5383 case Builtin::BI__builtin_subcl:
5384 case Builtin::BI__builtin_subcll: {
5385
5386 // We translate all of these builtins from expressions of the form:
5387 // int x = ..., y = ..., carryin = ..., carryout, result;
5388 // result = __builtin_addc(x, y, carryin, &carryout);
5389 //
5390 // to LLVM IR of the form:
5391 //
5392 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5393 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5394 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5395 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5396 // i32 %carryin)
5397 // %result = extractvalue {i32, i1} %tmp2, 0
5398 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5399 // %tmp3 = or i1 %carry1, %carry2
5400 // %tmp4 = zext i1 %tmp3 to i32
5401 // store i32 %tmp4, i32* %carryout
5402
5403 // Scalarize our inputs.
5404 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5405 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5406 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5407 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5408
5409 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5410 llvm::Intrinsic::ID IntrinsicId;
5411 switch (BuiltinID) {
5412 default: llvm_unreachable("Unknown multiprecision builtin id.");
5413 case Builtin::BI__builtin_addcb:
5414 case Builtin::BI__builtin_addcs:
5415 case Builtin::BI__builtin_addc:
5416 case Builtin::BI__builtin_addcl:
5417 case Builtin::BI__builtin_addcll:
5418 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5419 break;
5420 case Builtin::BI__builtin_subcb:
5421 case Builtin::BI__builtin_subcs:
5422 case Builtin::BI__builtin_subc:
5423 case Builtin::BI__builtin_subcl:
5424 case Builtin::BI__builtin_subcll:
5425 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5426 break;
5427 }
5428
5429 // Construct our resulting LLVM IR expression.
5430 llvm::Value *Carry1;
5431 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5432 X, Y, Carry1);
5433 llvm::Value *Carry2;
5434 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5435 Sum1, Carryin, Carry2);
5436 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5437 X->getType());
5438 Builder.CreateStore(CarryOut, CarryOutPtr);
5439 return RValue::get(Sum2);
5440 }
5441
5442 case Builtin::BI__builtin_add_overflow:
5443 case Builtin::BI__builtin_sub_overflow:
5444 case Builtin::BI__builtin_mul_overflow: {
5445 const clang::Expr *LeftArg = E->getArg(0);
5446 const clang::Expr *RightArg = E->getArg(1);
5447 const clang::Expr *ResultArg = E->getArg(2);
5448
5449 clang::QualType ResultQTy =
5450 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5451
5452 WidthAndSignedness LeftInfo =
5454 WidthAndSignedness RightInfo =
5456 WidthAndSignedness ResultInfo =
5458
5459 // Handle mixed-sign multiplication as a special case, because adding
5460 // runtime or backend support for our generic irgen would be too expensive.
5461 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5462 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5463 RightInfo, ResultArg, ResultQTy,
5464 ResultInfo);
5465
5466 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5467 ResultInfo))
5469 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5470 ResultInfo);
5471
5472 WidthAndSignedness EncompassingInfo =
5473 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5474
5475 llvm::Type *EncompassingLLVMTy =
5476 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5477
5478 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5479
5480 llvm::Intrinsic::ID IntrinsicId;
5481 switch (BuiltinID) {
5482 default:
5483 llvm_unreachable("Unknown overflow builtin id.");
5484 case Builtin::BI__builtin_add_overflow:
5485 IntrinsicId = EncompassingInfo.Signed
5486 ? llvm::Intrinsic::sadd_with_overflow
5487 : llvm::Intrinsic::uadd_with_overflow;
5488 break;
5489 case Builtin::BI__builtin_sub_overflow:
5490 IntrinsicId = EncompassingInfo.Signed
5491 ? llvm::Intrinsic::ssub_with_overflow
5492 : llvm::Intrinsic::usub_with_overflow;
5493 break;
5494 case Builtin::BI__builtin_mul_overflow:
5495 IntrinsicId = EncompassingInfo.Signed
5496 ? llvm::Intrinsic::smul_with_overflow
5497 : llvm::Intrinsic::umul_with_overflow;
5498 break;
5499 }
5500
5501 llvm::Value *Left = EmitScalarExpr(LeftArg);
5502 llvm::Value *Right = EmitScalarExpr(RightArg);
5503 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5504
5505 // Extend each operand to the encompassing type.
5506 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5507 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5508
5509 // Perform the operation on the extended values.
5510 llvm::Value *Overflow, *Result;
5511 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5512
5513 if (EncompassingInfo.Width > ResultInfo.Width) {
5514 // The encompassing type is wider than the result type, so we need to
5515 // truncate it.
5516 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5517
5518 // To see if the truncation caused an overflow, we will extend
5519 // the result and then compare it to the original result.
5520 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5521 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5522 llvm::Value *TruncationOverflow =
5523 Builder.CreateICmpNE(Result, ResultTruncExt);
5524
5525 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5526 Result = ResultTrunc;
5527 }
5528
5529 // Finally, store the result using the pointer.
5530 bool isVolatile =
5531 ResultArg->getType()->getPointeeType().isVolatileQualified();
5532 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5533
5534 return RValue::get(Overflow);
5535 }
5536
5537 case Builtin::BI__builtin_uadd_overflow:
5538 case Builtin::BI__builtin_uaddl_overflow:
5539 case Builtin::BI__builtin_uaddll_overflow:
5540 case Builtin::BI__builtin_usub_overflow:
5541 case Builtin::BI__builtin_usubl_overflow:
5542 case Builtin::BI__builtin_usubll_overflow:
5543 case Builtin::BI__builtin_umul_overflow:
5544 case Builtin::BI__builtin_umull_overflow:
5545 case Builtin::BI__builtin_umulll_overflow:
5546 case Builtin::BI__builtin_sadd_overflow:
5547 case Builtin::BI__builtin_saddl_overflow:
5548 case Builtin::BI__builtin_saddll_overflow:
5549 case Builtin::BI__builtin_ssub_overflow:
5550 case Builtin::BI__builtin_ssubl_overflow:
5551 case Builtin::BI__builtin_ssubll_overflow:
5552 case Builtin::BI__builtin_smul_overflow:
5553 case Builtin::BI__builtin_smull_overflow:
5554 case Builtin::BI__builtin_smulll_overflow: {
5555
5556 // We translate all of these builtins directly to the relevant llvm IR node.
5557
5558 // Scalarize our inputs.
5559 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5560 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5561 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5562
5563 // Decide which of the overflow intrinsics we are lowering to:
5564 llvm::Intrinsic::ID IntrinsicId;
5565 switch (BuiltinID) {
5566 default: llvm_unreachable("Unknown overflow builtin id.");
5567 case Builtin::BI__builtin_uadd_overflow:
5568 case Builtin::BI__builtin_uaddl_overflow:
5569 case Builtin::BI__builtin_uaddll_overflow:
5570 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5571 break;
5572 case Builtin::BI__builtin_usub_overflow:
5573 case Builtin::BI__builtin_usubl_overflow:
5574 case Builtin::BI__builtin_usubll_overflow:
5575 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5576 break;
5577 case Builtin::BI__builtin_umul_overflow:
5578 case Builtin::BI__builtin_umull_overflow:
5579 case Builtin::BI__builtin_umulll_overflow:
5580 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5581 break;
5582 case Builtin::BI__builtin_sadd_overflow:
5583 case Builtin::BI__builtin_saddl_overflow:
5584 case Builtin::BI__builtin_saddll_overflow:
5585 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5586 break;
5587 case Builtin::BI__builtin_ssub_overflow:
5588 case Builtin::BI__builtin_ssubl_overflow:
5589 case Builtin::BI__builtin_ssubll_overflow:
5590 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5591 break;
5592 case Builtin::BI__builtin_smul_overflow:
5593 case Builtin::BI__builtin_smull_overflow:
5594 case Builtin::BI__builtin_smulll_overflow:
5595 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5596 break;
5597 }
5598
5599
5600 llvm::Value *Carry;
5601 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5602 Builder.CreateStore(Sum, SumOutPtr);
5603
5604 return RValue::get(Carry);
5605 }
5606 case Builtin::BIaddressof:
5607 case Builtin::BI__addressof:
5608 case Builtin::BI__builtin_addressof:
5609 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5610 case Builtin::BI__builtin_function_start:
5613 case Builtin::BI__builtin_operator_new:
5615 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5616 case Builtin::BI__builtin_operator_delete:
5618 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5619 return RValue::get(nullptr);
5620
5621 case Builtin::BI__builtin_is_aligned:
5622 return EmitBuiltinIsAligned(E);
5623 case Builtin::BI__builtin_align_up:
5624 return EmitBuiltinAlignTo(E, true);
5625 case Builtin::BI__builtin_align_down:
5626 return EmitBuiltinAlignTo(E, false);
5627
5628 case Builtin::BI__noop:
5629 // __noop always evaluates to an integer literal zero.
5630 return RValue::get(ConstantInt::get(IntTy, 0));
5631 case Builtin::BI__builtin_call_with_static_chain: {
5632 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5633 const Expr *Chain = E->getArg(1);
5634 return EmitCall(Call->getCallee()->getType(),
5635 EmitCallee(Call->getCallee()), Call, ReturnValue,
5636 EmitScalarExpr(Chain));
5637 }
5638 case Builtin::BI_InterlockedExchange8:
5639 case Builtin::BI_InterlockedExchange16:
5640 case Builtin::BI_InterlockedExchange:
5641 case Builtin::BI_InterlockedExchangePointer:
5642 return RValue::get(
5643 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5644 case Builtin::BI_InterlockedCompareExchangePointer:
5645 return RValue::get(
5646 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange, E));
5647 case Builtin::BI_InterlockedCompareExchangePointer_nf:
5648 return RValue::get(
5649 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E));
5650 case Builtin::BI_InterlockedCompareExchange8:
5651 case Builtin::BI_InterlockedCompareExchange16:
5652 case Builtin::BI_InterlockedCompareExchange:
5653 case Builtin::BI_InterlockedCompareExchange64:
5655 case Builtin::BI_InterlockedIncrement16:
5656 case Builtin::BI_InterlockedIncrement:
5657 return RValue::get(
5658 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5659 case Builtin::BI_InterlockedDecrement16:
5660 case Builtin::BI_InterlockedDecrement:
5661 return RValue::get(
5662 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5663 case Builtin::BI_InterlockedAnd8:
5664 case Builtin::BI_InterlockedAnd16:
5665 case Builtin::BI_InterlockedAnd:
5666 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5667 case Builtin::BI_InterlockedExchangeAdd8:
5668 case Builtin::BI_InterlockedExchangeAdd16:
5669 case Builtin::BI_InterlockedExchangeAdd:
5670 return RValue::get(
5671 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5672 case Builtin::BI_InterlockedExchangeSub8:
5673 case Builtin::BI_InterlockedExchangeSub16:
5674 case Builtin::BI_InterlockedExchangeSub:
5675 return RValue::get(
5676 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5677 case Builtin::BI_InterlockedOr8:
5678 case Builtin::BI_InterlockedOr16:
5679 case Builtin::BI_InterlockedOr:
5680 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5681 case Builtin::BI_InterlockedXor8:
5682 case Builtin::BI_InterlockedXor16:
5683 case Builtin::BI_InterlockedXor:
5684 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5685
5686 case Builtin::BI_bittest64:
5687 case Builtin::BI_bittest:
5688 case Builtin::BI_bittestandcomplement64:
5689 case Builtin::BI_bittestandcomplement:
5690 case Builtin::BI_bittestandreset64:
5691 case Builtin::BI_bittestandreset:
5692 case Builtin::BI_bittestandset64:
5693 case Builtin::BI_bittestandset:
5694 case Builtin::BI_interlockedbittestandreset:
5695 case Builtin::BI_interlockedbittestandreset64:
5696 case Builtin::BI_interlockedbittestandset64:
5697 case Builtin::BI_interlockedbittestandset:
5698 case Builtin::BI_interlockedbittestandset_acq:
5699 case Builtin::BI_interlockedbittestandset_rel:
5700 case Builtin::BI_interlockedbittestandset_nf:
5701 case Builtin::BI_interlockedbittestandreset_acq:
5702 case Builtin::BI_interlockedbittestandreset_rel:
5703 case Builtin::BI_interlockedbittestandreset_nf:
5704 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5705
5706 // These builtins exist to emit regular volatile loads and stores not
5707 // affected by the -fms-volatile setting.
5708 case Builtin::BI__iso_volatile_load8:
5709 case Builtin::BI__iso_volatile_load16:
5710 case Builtin::BI__iso_volatile_load32:
5711 case Builtin::BI__iso_volatile_load64:
5712 return RValue::get(EmitISOVolatileLoad(*this, E));
5713 case Builtin::BI__iso_volatile_store8:
5714 case Builtin::BI__iso_volatile_store16:
5715 case Builtin::BI__iso_volatile_store32:
5716 case Builtin::BI__iso_volatile_store64:
5717 return RValue::get(EmitISOVolatileStore(*this, E));
5718
5719 case Builtin::BI__builtin_ptrauth_sign_constant:
5720 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5721
5722 case Builtin::BI__builtin_ptrauth_auth:
5723 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5724 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5725 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5726 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5727 case Builtin::BI__builtin_ptrauth_strip: {
5728 // Emit the arguments.
5730 for (auto argExpr : E->arguments())
5731 Args.push_back(EmitScalarExpr(argExpr));
5732
5733 // Cast the value to intptr_t, saving its original type.
5734 llvm::Type *OrigValueType = Args[0]->getType();
5735 if (OrigValueType->isPointerTy())
5736 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5737
5738 switch (BuiltinID) {
5739 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5740 if (Args[4]->getType()->isPointerTy())
5741 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5742 [[fallthrough]];
5743
5744 case Builtin::BI__builtin_ptrauth_auth:
5745 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5746 if (Args[2]->getType()->isPointerTy())
5747 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5748 break;
5749
5750 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5751 if (Args[1]->getType()->isPointerTy())
5752 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5753 break;
5754
5755 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5756 case Builtin::BI__builtin_ptrauth_strip:
5757 break;
5758 }
5759
5760 // Call the intrinsic.
5761 auto IntrinsicID = [&]() -> unsigned {
5762 switch (BuiltinID) {
5763 case Builtin::BI__builtin_ptrauth_auth:
5764 return llvm::Intrinsic::ptrauth_auth;
5765 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5766 return llvm::Intrinsic::ptrauth_resign;
5767 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5768 return llvm::Intrinsic::ptrauth_blend;
5769 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5770 return llvm::Intrinsic::ptrauth_sign_generic;
5771 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5772 return llvm::Intrinsic::ptrauth_sign;
5773 case Builtin::BI__builtin_ptrauth_strip:
5774 return llvm::Intrinsic::ptrauth_strip;
5775 }
5776 llvm_unreachable("bad ptrauth intrinsic");
5777 }();
5778 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5779 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5780
5781 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5782 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5783 OrigValueType->isPointerTy()) {
5784 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5785 }
5786 return RValue::get(Result);
5787 }
5788
5789 case Builtin::BI__exception_code:
5790 case Builtin::BI_exception_code:
5792 case Builtin::BI__exception_info:
5793 case Builtin::BI_exception_info:
5795 case Builtin::BI__abnormal_termination:
5796 case Builtin::BI_abnormal_termination:
5798 case Builtin::BI_setjmpex:
5799 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5800 E->getArg(0)->getType()->isPointerType())
5801 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5802 break;
5803 case Builtin::BI_setjmp:
5804 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5805 E->getArg(0)->getType()->isPointerType()) {
5806 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5807 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5808 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5809 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5810 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5811 }
5812 break;
5813
5814 // C++ std:: builtins.
5815 case Builtin::BImove:
5816 case Builtin::BImove_if_noexcept:
5817 case Builtin::BIforward:
5818 case Builtin::BIforward_like:
5819 case Builtin::BIas_const:
5820 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5821 case Builtin::BI__GetExceptionInfo: {
5822 if (llvm::GlobalVariable *GV =
5824 return RValue::get(GV);
5825 break;
5826 }
5827
5828 case Builtin::BI__fastfail:
5829 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5830
5831 case Builtin::BI__builtin_coro_id:
5832 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5833 case Builtin::BI__builtin_coro_promise:
5834 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5835 case Builtin::BI__builtin_coro_resume:
5836 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5837 return RValue::get(nullptr);
5838 case Builtin::BI__builtin_coro_frame:
5839 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5840 case Builtin::BI__builtin_coro_noop:
5841 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5842 case Builtin::BI__builtin_coro_free:
5843 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5844 case Builtin::BI__builtin_coro_destroy:
5845 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5846 return RValue::get(nullptr);
5847 case Builtin::BI__builtin_coro_done:
5848 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5849 case Builtin::BI__builtin_coro_alloc:
5850 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5851 case Builtin::BI__builtin_coro_begin:
5852 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5853 case Builtin::BI__builtin_coro_end:
5854 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5855 case Builtin::BI__builtin_coro_suspend:
5856 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5857 case Builtin::BI__builtin_coro_size:
5858 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5859 case Builtin::BI__builtin_coro_align:
5860 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5861
5862 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5863 case Builtin::BIread_pipe:
5864 case Builtin::BIwrite_pipe: {
5865 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5866 *Arg1 = EmitScalarExpr(E->getArg(1));
5867 CGOpenCLRuntime OpenCLRT(CGM);
5868 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5869 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5870
5871 // Type of the generic packet parameter.
5872 unsigned GenericAS =
5874 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5875
5876 // Testing which overloaded version we should generate the call for.
5877 if (2U == E->getNumArgs()) {
5878 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5879 : "__write_pipe_2";
5880 // Creating a generic function type to be able to call with any builtin or
5881 // user defined type.
5882 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5883 llvm::FunctionType *FTy = llvm::FunctionType::get(
5884 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5885 Value *ACast = Builder.CreateAddrSpaceCast(Arg1, I8PTy);
5886 return RValue::get(
5888 {Arg0, ACast, PacketSize, PacketAlign}));
5889 } else {
5890 assert(4 == E->getNumArgs() &&
5891 "Illegal number of parameters to pipe function");
5892 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5893 : "__write_pipe_4";
5894
5895 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5896 Int32Ty, Int32Ty};
5897 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5898 *Arg3 = EmitScalarExpr(E->getArg(3));
5899 llvm::FunctionType *FTy = llvm::FunctionType::get(
5900 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5901 Value *ACast = Builder.CreateAddrSpaceCast(Arg3, I8PTy);
5902 // We know the third argument is an integer type, but we may need to cast
5903 // it to i32.
5904 if (Arg2->getType() != Int32Ty)
5905 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5906 return RValue::get(
5908 {Arg0, Arg1, Arg2, ACast, PacketSize, PacketAlign}));
5909 }
5910 }
5911 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5912 // functions
5913 case Builtin::BIreserve_read_pipe:
5914 case Builtin::BIreserve_write_pipe:
5915 case Builtin::BIwork_group_reserve_read_pipe:
5916 case Builtin::BIwork_group_reserve_write_pipe:
5917 case Builtin::BIsub_group_reserve_read_pipe:
5918 case Builtin::BIsub_group_reserve_write_pipe: {
5919 // Composing the mangled name for the function.
5920 const char *Name;
5921 if (BuiltinID == Builtin::BIreserve_read_pipe)
5922 Name = "__reserve_read_pipe";
5923 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5924 Name = "__reserve_write_pipe";
5925 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5926 Name = "__work_group_reserve_read_pipe";
5927 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5928 Name = "__work_group_reserve_write_pipe";
5929 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5930 Name = "__sub_group_reserve_read_pipe";
5931 else
5932 Name = "__sub_group_reserve_write_pipe";
5933
5934 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5935 *Arg1 = EmitScalarExpr(E->getArg(1));
5936 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5937 CGOpenCLRuntime OpenCLRT(CGM);
5938 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5939 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5940
5941 // Building the generic function prototype.
5942 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5943 llvm::FunctionType *FTy = llvm::FunctionType::get(
5944 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5945 // We know the second argument is an integer type, but we may need to cast
5946 // it to i32.
5947 if (Arg1->getType() != Int32Ty)
5948 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5950 {Arg0, Arg1, PacketSize, PacketAlign}));
5951 }
5952 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5953 // functions
5954 case Builtin::BIcommit_read_pipe:
5955 case Builtin::BIcommit_write_pipe:
5956 case Builtin::BIwork_group_commit_read_pipe:
5957 case Builtin::BIwork_group_commit_write_pipe:
5958 case Builtin::BIsub_group_commit_read_pipe:
5959 case Builtin::BIsub_group_commit_write_pipe: {
5960 const char *Name;
5961 if (BuiltinID == Builtin::BIcommit_read_pipe)
5962 Name = "__commit_read_pipe";
5963 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5964 Name = "__commit_write_pipe";
5965 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5966 Name = "__work_group_commit_read_pipe";
5967 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5968 Name = "__work_group_commit_write_pipe";
5969 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5970 Name = "__sub_group_commit_read_pipe";
5971 else
5972 Name = "__sub_group_commit_write_pipe";
5973
5974 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5975 *Arg1 = EmitScalarExpr(E->getArg(1));
5976 CGOpenCLRuntime OpenCLRT(CGM);
5977 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5978 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5979
5980 // Building the generic function prototype.
5981 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5982 llvm::FunctionType *FTy =
5983 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5984 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5985
5987 {Arg0, Arg1, PacketSize, PacketAlign}));
5988 }
5989 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5990 case Builtin::BIget_pipe_num_packets:
5991 case Builtin::BIget_pipe_max_packets: {
5992 const char *BaseName;
5993 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5994 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5995 BaseName = "__get_pipe_num_packets";
5996 else
5997 BaseName = "__get_pipe_max_packets";
5998 std::string Name = std::string(BaseName) +
5999 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
6000
6001 // Building the generic function prototype.
6002 Value *Arg0 = EmitScalarExpr(E->getArg(0));
6003 CGOpenCLRuntime OpenCLRT(CGM);
6004 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6005 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6006 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
6007 llvm::FunctionType *FTy = llvm::FunctionType::get(
6008 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6009
6011 {Arg0, PacketSize, PacketAlign}));
6012 }
6013
6014 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
6015 case Builtin::BIto_global:
6016 case Builtin::BIto_local:
6017 case Builtin::BIto_private: {
6018 auto Arg0 = EmitScalarExpr(E->getArg(0));
6019 auto NewArgT = llvm::PointerType::get(
6022 auto NewRetT = llvm::PointerType::get(
6026 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
6027 llvm::Value *NewArg;
6028 if (Arg0->getType()->getPointerAddressSpace() !=
6029 NewArgT->getPointerAddressSpace())
6030 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
6031 else
6032 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
6033 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
6034 auto NewCall =
6035 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
6036 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
6037 ConvertType(E->getType())));
6038 }
6039
6040 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
6041 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
6042 // The code below expands the builtin call to a call to one of the following
6043 // functions that an OpenCL runtime library will have to provide:
6044 // __enqueue_kernel_basic
6045 // __enqueue_kernel_varargs
6046 // __enqueue_kernel_basic_events
6047 // __enqueue_kernel_events_varargs
6048 case Builtin::BIenqueue_kernel: {
6049 StringRef Name; // Generated function call name
6050 unsigned NumArgs = E->getNumArgs();
6051
6052 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
6053 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6054 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6055
6056 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
6057 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
6058 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
6059 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
6060 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
6061
6062 if (NumArgs == 4) {
6063 // The most basic form of the call with parameters:
6064 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
6065 Name = "__enqueue_kernel_basic";
6066 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
6067 GenericVoidPtrTy};
6068 llvm::FunctionType *FTy = llvm::FunctionType::get(
6069 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6070
6071 auto Info =
6072 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6073 llvm::Value *Kernel =
6074 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6075 llvm::Value *Block =
6076 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6077
6078 auto RTCall = EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6079 {Queue, Flags, Range, Kernel, Block});
6080 return RValue::get(RTCall);
6081 }
6082 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
6083
6084 // Create a temporary array to hold the sizes of local pointer arguments
6085 // for the block. \p First is the position of the first size argument.
6086 auto CreateArrayForSizeVar = [=](unsigned First)
6087 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
6088 llvm::APInt ArraySize(32, NumArgs - First);
6090 getContext().getSizeType(), ArraySize, nullptr,
6092 /*IndexTypeQuals=*/0);
6093 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
6094 llvm::Value *TmpPtr = Tmp.getPointer();
6095 // The EmitLifetime* pair expect a naked Alloca as their last argument,
6096 // however for cases where the default AS is not the Alloca AS, Tmp is
6097 // actually the Alloca ascasted to the default AS, hence the
6098 // stripPointerCasts()
6099 llvm::Value *Alloca = TmpPtr->stripPointerCasts();
6100 llvm::Value *TmpSize = EmitLifetimeStart(
6101 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
6102 llvm::Value *ElemPtr;
6103 // Each of the following arguments specifies the size of the corresponding
6104 // argument passed to the enqueued block.
6105 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
6106 for (unsigned I = First; I < NumArgs; ++I) {
6107 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
6108 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
6109 {Zero, Index});
6110 if (I == First)
6111 ElemPtr = GEP;
6112 auto *V =
6113 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
6115 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
6116 }
6117 // Return the Alloca itself rather than a potential ascast as this is only
6118 // used by the paired EmitLifetimeEnd.
6119 return std::tie(ElemPtr, TmpSize, Alloca);
6120 };
6121
6122 // Could have events and/or varargs.
6123 if (E->getArg(3)->getType()->isBlockPointerType()) {
6124 // No events passed, but has variadic arguments.
6125 Name = "__enqueue_kernel_varargs";
6126 auto Info =
6127 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6128 llvm::Value *Kernel =
6129 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6130 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6131 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6132 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
6133
6134 // Create a vector of the arguments, as well as a constant value to
6135 // express to the runtime the number of variadic arguments.
6136 llvm::Value *const Args[] = {Queue, Flags,
6137 Range, Kernel,
6138 Block, ConstantInt::get(IntTy, NumArgs - 4),
6139 ElemPtr};
6140 llvm::Type *const ArgTys[] = {
6141 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
6142 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
6143
6144 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
6145 auto Call = RValue::get(
6146 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
6147 if (TmpSize)
6148 EmitLifetimeEnd(TmpSize, TmpPtr);
6149 return Call;
6150 }
6151 // Any calls now have event arguments passed.
6152 if (NumArgs >= 7) {
6153 llvm::PointerType *PtrTy = llvm::PointerType::get(
6156
6157 llvm::Value *NumEvents =
6158 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
6159
6160 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
6161 // to be a null pointer constant (including `0` literal), we can take it
6162 // into account and emit null pointer directly.
6163 llvm::Value *EventWaitList = nullptr;
6164 if (E->getArg(4)->isNullPointerConstant(
6166 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
6167 } else {
6168 EventWaitList =
6169 E->getArg(4)->getType()->isArrayType()
6170 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
6171 : EmitScalarExpr(E->getArg(4));
6172 // Convert to generic address space.
6173 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
6174 }
6175 llvm::Value *EventRet = nullptr;
6176 if (E->getArg(5)->isNullPointerConstant(
6178 EventRet = llvm::ConstantPointerNull::get(PtrTy);
6179 } else {
6180 EventRet =
6181 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
6182 }
6183
6184 auto Info =
6185 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
6186 llvm::Value *Kernel =
6187 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6188 llvm::Value *Block =
6189 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6190
6191 std::vector<llvm::Type *> ArgTys = {
6192 QueueTy, Int32Ty, RangeTy, Int32Ty,
6193 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
6194
6195 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
6196 NumEvents, EventWaitList, EventRet,
6197 Kernel, Block};
6198
6199 if (NumArgs == 7) {
6200 // Has events but no variadics.
6201 Name = "__enqueue_kernel_basic_events";
6202 llvm::FunctionType *FTy = llvm::FunctionType::get(
6203 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6204 return RValue::get(
6207 }
6208 // Has event info and variadics
6209 // Pass the number of variadics to the runtime function too.
6210 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
6211 ArgTys.push_back(Int32Ty);
6212 Name = "__enqueue_kernel_events_varargs";
6213
6214 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6215 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
6216 Args.push_back(ElemPtr);
6217 ArgTys.push_back(ElemPtr->getType());
6218
6219 llvm::FunctionType *FTy = llvm::FunctionType::get(
6220 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6221 auto Call =
6224 if (TmpSize)
6225 EmitLifetimeEnd(TmpSize, TmpPtr);
6226 return Call;
6227 }
6228 llvm_unreachable("Unexpected enqueue_kernel signature");
6229 }
6230 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
6231 // parameter.
6232 case Builtin::BIget_kernel_work_group_size: {
6233 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6234 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6235 auto Info =
6236 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6237 Value *Kernel =
6238 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6239 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6242 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6243 false),
6244 "__get_kernel_work_group_size_impl"),
6245 {Kernel, Arg}));
6246 }
6247 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
6248 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6249 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6250 auto Info =
6251 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6252 Value *Kernel =
6253 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6254 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6257 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6258 false),
6259 "__get_kernel_preferred_work_group_size_multiple_impl"),
6260 {Kernel, Arg}));
6261 }
6262 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
6263 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
6264 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6265 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6266 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
6267 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
6268 auto Info =
6269 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
6270 Value *Kernel =
6271 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6272 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6273 const char *Name =
6274 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
6275 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
6276 : "__get_kernel_sub_group_count_for_ndrange_impl";
6279 llvm::FunctionType::get(
6280 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
6281 false),
6282 Name),
6283 {NDRange, Kernel, Block}));
6284 }
6285 case Builtin::BI__builtin_store_half:
6286 case Builtin::BI__builtin_store_halff: {
6287 Value *Val = EmitScalarExpr(E->getArg(0));
6289 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
6290 Builder.CreateStore(HalfVal, Address);
6291 return RValue::get(nullptr);
6292 }
6293 case Builtin::BI__builtin_load_half: {
6295 Value *HalfVal = Builder.CreateLoad(Address);
6296 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
6297 }
6298 case Builtin::BI__builtin_load_halff: {
6300 Value *HalfVal = Builder.CreateLoad(Address);
6301 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
6302 }
6303 case Builtin::BI__builtin_printf:
6304 case Builtin::BIprintf:
6305 if (getTarget().getTriple().isNVPTX() ||
6306 getTarget().getTriple().isAMDGCN() ||
6307 (getTarget().getTriple().isSPIRV() &&
6308 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
6309 if (getTarget().getTriple().isNVPTX())
6311 if ((getTarget().getTriple().isAMDGCN() ||
6312 getTarget().getTriple().isSPIRV()) &&
6313 getLangOpts().HIP)
6315 }
6316
6317 break;
6318 case Builtin::BI__builtin_canonicalize:
6319 case Builtin::BI__builtin_canonicalizef:
6320 case Builtin::BI__builtin_canonicalizef16:
6321 case Builtin::BI__builtin_canonicalizel:
6322 return RValue::get(
6323 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
6324
6325 case Builtin::BI__builtin_thread_pointer: {
6326 if (!getContext().getTargetInfo().isTLSSupported())
6327 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
6328 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6329 break;
6330 }
6331 case Builtin::BI__builtin_os_log_format:
6332 return emitBuiltinOSLogFormat(*E);
6333
6334 case Builtin::BI__xray_customevent: {
6336 return RValue::getIgnored();
6337
6340 return RValue::getIgnored();
6341
6342 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6343 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6344 return RValue::getIgnored();
6345
6346 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6347 auto FTy = F->getFunctionType();
6348 auto Arg0 = E->getArg(0);
6349 auto Arg0Val = EmitScalarExpr(Arg0);
6350 auto Arg0Ty = Arg0->getType();
6351 auto PTy0 = FTy->getParamType(0);
6352 if (PTy0 != Arg0Val->getType()) {
6353 if (Arg0Ty->isArrayType())
6354 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6355 else
6356 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6357 }
6358 auto Arg1 = EmitScalarExpr(E->getArg(1));
6359 auto PTy1 = FTy->getParamType(1);
6360 if (PTy1 != Arg1->getType())
6361 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6362 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6363 }
6364
6365 case Builtin::BI__xray_typedevent: {
6366 // TODO: There should be a way to always emit events even if the current
6367 // function is not instrumented. Losing events in a stream can cripple
6368 // a trace.
6370 return RValue::getIgnored();
6371
6374 return RValue::getIgnored();
6375
6376 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6377 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6378 return RValue::getIgnored();
6379
6380 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6381 auto FTy = F->getFunctionType();
6382 auto Arg0 = EmitScalarExpr(E->getArg(0));
6383 auto PTy0 = FTy->getParamType(0);
6384 if (PTy0 != Arg0->getType())
6385 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6386 auto Arg1 = E->getArg(1);
6387 auto Arg1Val = EmitScalarExpr(Arg1);
6388 auto Arg1Ty = Arg1->getType();
6389 auto PTy1 = FTy->getParamType(1);
6390 if (PTy1 != Arg1Val->getType()) {
6391 if (Arg1Ty->isArrayType())
6392 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6393 else
6394 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6395 }
6396 auto Arg2 = EmitScalarExpr(E->getArg(2));
6397 auto PTy2 = FTy->getParamType(2);
6398 if (PTy2 != Arg2->getType())
6399 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6400 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6401 }
6402
6403 case Builtin::BI__builtin_ms_va_start:
6404 case Builtin::BI__builtin_ms_va_end:
6405 return RValue::get(
6407 BuiltinID == Builtin::BI__builtin_ms_va_start));
6408
6409 case Builtin::BI__builtin_ms_va_copy: {
6410 // Lower this manually. We can't reliably determine whether or not any
6411 // given va_copy() is for a Win64 va_list from the calling convention
6412 // alone, because it's legal to do this from a System V ABI function.
6413 // With opaque pointer types, we won't have enough information in LLVM
6414 // IR to determine this from the argument types, either. Best to do it
6415 // now, while we have enough information.
6416 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6417 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6418
6419 DestAddr = DestAddr.withElementType(Int8PtrTy);
6420 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6421
6422 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6423 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6424 }
6425
6426 case Builtin::BI__builtin_get_device_side_mangled_name: {
6427 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6428 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6429 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6430 return RValue::get(Str.getPointer());
6431 }
6432 }
6433
6434 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6435 // the call using the normal call path, but using the unmangled
6436 // version of the function name.
6437 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6438 return emitLibraryCall(*this, FD, E,
6439 CGM.getBuiltinLibFunction(FD, BuiltinID));
6440
6441 // If this is a predefined lib function (e.g. malloc), emit the call
6442 // using exactly the normal call path.
6443 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6444 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6445
6446 // Check that a call to a target specific builtin has the correct target
6447 // features.
6448 // This is down here to avoid non-target specific builtins, however, if
6449 // generic builtins start to require generic target features then we
6450 // can move this up to the beginning of the function.
6452
6453 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6454 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6455
6456 // See if we have a target specific intrinsic.
6457 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6458 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6459 StringRef Prefix =
6460 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6461 if (!Prefix.empty()) {
6462 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6463 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6464 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6465 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6466 // NOTE we don't need to perform a compatibility flag check here since the
6467 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6468 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6469 if (IntrinsicID == Intrinsic::not_intrinsic)
6470 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6471 }
6472
6473 if (IntrinsicID != Intrinsic::not_intrinsic) {
6475
6476 // Find out if any arguments are required to be integer constant
6477 // expressions.
6478 unsigned ICEArguments = 0;
6480 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6481 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6482
6483 Function *F = CGM.getIntrinsic(IntrinsicID);
6484 llvm::FunctionType *FTy = F->getFunctionType();
6485
6486 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6487 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6488 // If the intrinsic arg type is different from the builtin arg type
6489 // we need to do a bit cast.
6490 llvm::Type *PTy = FTy->getParamType(i);
6491 if (PTy != ArgValue->getType()) {
6492 // XXX - vector of pointers?
6493 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6494 if (PtrTy->getAddressSpace() !=
6495 ArgValue->getType()->getPointerAddressSpace()) {
6496 ArgValue = Builder.CreateAddrSpaceCast(
6497 ArgValue, llvm::PointerType::get(getLLVMContext(),
6498 PtrTy->getAddressSpace()));
6499 }
6500 }
6501
6502 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6503 // in amx intrinsics.
6504 if (PTy->isX86_AMXTy())
6505 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6506 {ArgValue->getType()}, {ArgValue});
6507 else
6508 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6509 }
6510
6511 Args.push_back(ArgValue);
6512 }
6513
6514 Value *V = Builder.CreateCall(F, Args);
6515 QualType BuiltinRetType = E->getType();
6516
6517 llvm::Type *RetTy = VoidTy;
6518 if (!BuiltinRetType->isVoidType())
6519 RetTy = ConvertType(BuiltinRetType);
6520
6521 if (RetTy != V->getType()) {
6522 // XXX - vector of pointers?
6523 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6524 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6526 V, llvm::PointerType::get(getLLVMContext(),
6527 PtrTy->getAddressSpace()));
6528 }
6529 }
6530
6531 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6532 // in amx intrinsics.
6533 if (V->getType()->isX86_AMXTy())
6534 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6535 {V});
6536 else
6537 V = Builder.CreateBitCast(V, RetTy);
6538 }
6539
6540 if (RetTy->isVoidTy())
6541 return RValue::get(nullptr);
6542
6543 return RValue::get(V);
6544 }
6545
6546 // Some target-specific builtins can have aggregate return values, e.g.
6547 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6548 // ReturnValue to be non-null, so that the target-specific emission code can
6549 // always just emit into it.
6551 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6552 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6553 ReturnValue = ReturnValueSlot(DestPtr, false);
6554 }
6555
6556 // Now see if we can emit a target-specific builtin.
6557 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6558 switch (EvalKind) {
6559 case TEK_Scalar:
6560 if (V->getType()->isVoidTy())
6561 return RValue::get(nullptr);
6562 return RValue::get(V);
6563 case TEK_Aggregate:
6564 return RValue::getAggregate(ReturnValue.getAddress(),
6565 ReturnValue.isVolatile());
6566 case TEK_Complex:
6567 llvm_unreachable("No current target builtin returns complex");
6568 }
6569 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6570 }
6571
6572 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6573 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E, ReturnValue)) {
6574 switch (EvalKind) {
6575 case TEK_Scalar:
6576 if (V->getType()->isVoidTy())
6577 return RValue::get(nullptr);
6578 return RValue::get(V);
6579 case TEK_Aggregate:
6580 return RValue::getAggregate(ReturnValue.getAddress(),
6581 ReturnValue.isVolatile());
6582 case TEK_Complex:
6583 llvm_unreachable("No current hlsl builtin returns complex");
6584 }
6585 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6586 }
6587
6588 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6589 return EmitHipStdParUnsupportedBuiltin(this, FD);
6590
6591 ErrorUnsupported(E, "builtin function");
6592
6593 // Unknown builtin, for now just dump it out and return undef.
6594 return GetUndefRValue(E->getType());
6595}
6596
6598 unsigned BuiltinID, const CallExpr *E,
6599 ReturnValueSlot ReturnValue,
6600 llvm::Triple::ArchType Arch) {
6601 // When compiling in HipStdPar mode we have to be conservative in rejecting
6602 // target specific features in the FE, and defer the possible error to the
6603 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6604 // referenced by an accelerator executable function, we emit an error.
6605 // Returning nullptr here leads to the builtin being handled in
6606 // EmitStdParUnsupportedBuiltin.
6607 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6608 Arch != CGF->getTarget().getTriple().getArch())
6609 return nullptr;
6610
6611 switch (Arch) {
6612 case llvm::Triple::arm:
6613 case llvm::Triple::armeb:
6614 case llvm::Triple::thumb:
6615 case llvm::Triple::thumbeb:
6616 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6617 case llvm::Triple::aarch64:
6618 case llvm::Triple::aarch64_32:
6619 case llvm::Triple::aarch64_be:
6620 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6621 case llvm::Triple::bpfeb:
6622 case llvm::Triple::bpfel:
6623 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6624 case llvm::Triple::x86:
6625 case llvm::Triple::x86_64:
6626 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6627 case llvm::Triple::ppc:
6628 case llvm::Triple::ppcle:
6629 case llvm::Triple::ppc64:
6630 case llvm::Triple::ppc64le:
6631 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6632 case llvm::Triple::r600:
6633 case llvm::Triple::amdgcn:
6634 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6635 case llvm::Triple::systemz:
6636 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6637 case llvm::Triple::nvptx:
6638 case llvm::Triple::nvptx64:
6639 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6640 case llvm::Triple::wasm32:
6641 case llvm::Triple::wasm64:
6642 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6643 case llvm::Triple::hexagon:
6644 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6645 case llvm::Triple::riscv32:
6646 case llvm::Triple::riscv64:
6647 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6648 case llvm::Triple::spirv:
6649 return CGF->EmitSPIRVBuiltinExpr(BuiltinID, E);
6650 case llvm::Triple::spirv64:
6651 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6652 return nullptr;
6653 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6654 default:
6655 return nullptr;
6656 }
6657}
6658
6660 const CallExpr *E,
6661 ReturnValueSlot ReturnValue) {
6662 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6663 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6665 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6666 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6667 }
6668
6669 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6670 getTarget().getTriple().getArch());
6671}
6672
6673static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6674 NeonTypeFlags TypeFlags,
6675 bool HasLegalHalfType = true,
6676 bool V1Ty = false,
6677 bool AllowBFloatArgsAndRet = true) {
6678 int IsQuad = TypeFlags.isQuad();
6679 switch (TypeFlags.getEltType()) {
6683 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6686 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6688 if (AllowBFloatArgsAndRet)
6689 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6690 else
6691 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6693 if (HasLegalHalfType)
6694 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6695 else
6696 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6698 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6701 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6703 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6704 // There is a lot of i128 and f128 API missing.
6705 // so we use v16i8 to represent poly128 and get pattern matched.
6706 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6708 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6710 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6711 }
6712 llvm_unreachable("Unknown vector element type!");
6713}
6714
6715static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6716 NeonTypeFlags IntTypeFlags) {
6717 int IsQuad = IntTypeFlags.isQuad();
6718 switch (IntTypeFlags.getEltType()) {
6720 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6722 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6724 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6725 default:
6726 llvm_unreachable("Type can't be converted to floating-point!");
6727 }
6728}
6729
6731 const ElementCount &Count) {
6732 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6733 return Builder.CreateShuffleVector(V, V, SV, "lane");
6734}
6735
6737 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6738 return EmitNeonSplat(V, C, EC);
6739}
6740
6742 const char *name,
6743 unsigned shift, bool rightshift) {
6744 unsigned j = 0;
6745 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6746 ai != ae; ++ai, ++j) {
6747 if (F->isConstrainedFPIntrinsic())
6748 if (ai->getType()->isMetadataTy())
6749 continue;
6750 if (shift > 0 && shift == j)
6751 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6752 else
6753 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6754 }
6755
6756 if (F->isConstrainedFPIntrinsic())
6757 return Builder.CreateConstrainedFPCall(F, Ops, name);
6758 else
6759 return Builder.CreateCall(F, Ops, name);
6760}
6761
6765 const CallExpr *E, const char *name) {
6766 llvm::Value *FPM =
6767 EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
6768 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
6769 return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
6770}
6771
6773 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
6774 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
6775
6776 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
6777 RetTy->getPrimitiveSizeInBits();
6778 llvm::Type *Tys[] = {llvm::FixedVectorType::get(RetTy, ElemCount),
6779 Ops[1]->getType()};
6780 if (ExtendLaneArg) {
6781 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
6782 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
6783 Builder.getInt64(0));
6784 }
6785 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
6786}
6787
6789 unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy,
6790 SmallVectorImpl<llvm::Value *> &Ops, const CallExpr *E, const char *name) {
6791
6792 if (ExtendLaneArg) {
6793 auto *VT = llvm::FixedVectorType::get(Int8Ty, 16);
6794 Ops[2] = Builder.CreateInsertVector(VT, PoisonValue::get(VT), Ops[2],
6795 Builder.getInt64(0));
6796 }
6797 const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
6798 RetTy->getPrimitiveSizeInBits();
6799 return EmitFP8NeonCall(IID, {llvm::FixedVectorType::get(RetTy, ElemCount)},
6800 Ops, E, name);
6801}
6802
6804 bool neg) {
6805 int SV = cast<ConstantInt>(V)->getSExtValue();
6806 return ConstantInt::get(Ty, neg ? -SV : SV);
6807}
6808
6809Value *CodeGenFunction::EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0,
6810 llvm::Type *Ty1, bool Extract,
6812 const CallExpr *E,
6813 const char *name) {
6814 llvm::Type *Tys[] = {Ty0, Ty1};
6815 if (Extract) {
6816 // Op[0] is mfloat8x16_t, but the intrinsic converts only the lower part of
6817 // the vector.
6818 Tys[1] = llvm::FixedVectorType::get(Int8Ty, 8);
6819 Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
6820 }
6821 return EmitFP8NeonCall(IID, Tys, Ops, E, name);
6822}
6823
6824// Right-shift a vector by a constant.
6826 llvm::Type *Ty, bool usgn,
6827 const char *name) {
6828 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6829
6830 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6831 int EltSize = VTy->getScalarSizeInBits();
6832
6833 Vec = Builder.CreateBitCast(Vec, Ty);
6834
6835 // lshr/ashr are undefined when the shift amount is equal to the vector
6836 // element size.
6837 if (ShiftAmt == EltSize) {
6838 if (usgn) {
6839 // Right-shifting an unsigned value by its size yields 0.
6840 return llvm::ConstantAggregateZero::get(VTy);
6841 } else {
6842 // Right-shifting a signed value by its size is equivalent
6843 // to a shift of size-1.
6844 --ShiftAmt;
6845 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6846 }
6847 }
6848
6849 Shift = EmitNeonShiftVector(Shift, Ty, false);
6850 if (usgn)
6851 return Builder.CreateLShr(Vec, Shift, name);
6852 else
6853 return Builder.CreateAShr(Vec, Shift, name);
6854}
6855
6856enum {
6857 AddRetType = (1 << 0),
6858 Add1ArgType = (1 << 1),
6859 Add2ArgTypes = (1 << 2),
6860
6863
6865 UnsignedAlts = (1 << 6),
6866
6869
6877
6878namespace {
6879struct ARMVectorIntrinsicInfo {
6880 const char *NameHint;
6881 unsigned BuiltinID;
6882 unsigned LLVMIntrinsic;
6883 unsigned AltLLVMIntrinsic;
6885
6886 bool operator<(unsigned RHSBuiltinID) const {
6887 return BuiltinID < RHSBuiltinID;
6888 }
6889 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6890 return BuiltinID < TE.BuiltinID;
6891 }
6892};
6893} // end anonymous namespace
6894
6895#define NEONMAP0(NameBase) \
6896 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6897
6898#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6899 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6900 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6901
6902#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6903 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6904 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6905 TypeModifier }
6906
6907static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6908 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6909 NEONMAP0(splat_lane_v),
6910 NEONMAP0(splat_laneq_v),
6911 NEONMAP0(splatq_lane_v),
6912 NEONMAP0(splatq_laneq_v),
6913 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6914 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6915 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6916 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6917 NEONMAP0(vadd_v),
6918 NEONMAP0(vaddhn_v),
6919 NEONMAP0(vaddq_v),
6920 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6921 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6922 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6923 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6924 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6925 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6926 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6927 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6928 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6929 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6930 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6931 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6932 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6933 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6934 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6935 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6936 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6937 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6938 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6939 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6940 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6941 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6942 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6943 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6944 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6945 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6946 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6947 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6948 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6949 NEONMAP0(vceqz_v),
6950 NEONMAP0(vceqzq_v),
6951 NEONMAP0(vcgez_v),
6952 NEONMAP0(vcgezq_v),
6953 NEONMAP0(vcgtz_v),
6954 NEONMAP0(vcgtzq_v),
6955 NEONMAP0(vclez_v),
6956 NEONMAP0(vclezq_v),
6957 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6958 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6959 NEONMAP0(vcltz_v),
6960 NEONMAP0(vcltzq_v),
6961 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6962 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6963 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6964 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6965 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6966 NEONMAP0(vcvt_f16_s16),
6967 NEONMAP0(vcvt_f16_u16),
6968 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6969 NEONMAP0(vcvt_f32_v),
6970 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6971 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6972 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6973 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6974 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6975 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6976 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6977 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6978 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6979 NEONMAP0(vcvt_s16_f16),
6980 NEONMAP0(vcvt_s32_v),
6981 NEONMAP0(vcvt_s64_v),
6982 NEONMAP0(vcvt_u16_f16),
6983 NEONMAP0(vcvt_u32_v),
6984 NEONMAP0(vcvt_u64_v),
6985 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6986 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6987 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6988 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6989 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6990 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6991 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6992 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6993 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6994 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6995 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6996 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6997 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6998 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6999 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
7000 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
7001 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
7002 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
7003 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
7004 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
7005 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
7006 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
7007 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
7008 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
7009 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
7010 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
7011 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
7012 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
7013 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
7014 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
7015 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
7016 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
7017 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
7018 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
7019 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
7020 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
7021 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
7022 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
7023 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
7024 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
7025 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
7026 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
7027 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
7028 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
7029 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
7030 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
7031 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
7032 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
7033 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
7034 NEONMAP0(vcvtq_f16_s16),
7035 NEONMAP0(vcvtq_f16_u16),
7036 NEONMAP0(vcvtq_f32_v),
7037 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
7038 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
7039 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
7040 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
7041 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
7042 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
7043 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
7044 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
7045 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
7046 NEONMAP0(vcvtq_s16_f16),
7047 NEONMAP0(vcvtq_s32_v),
7048 NEONMAP0(vcvtq_s64_v),
7049 NEONMAP0(vcvtq_u16_f16),
7050 NEONMAP0(vcvtq_u32_v),
7051 NEONMAP0(vcvtq_u64_v),
7052 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
7053 NEONMAP1(vdot_u32, arm_neon_udot, 0),
7054 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
7055 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
7056 NEONMAP0(vext_v),
7057 NEONMAP0(vextq_v),
7058 NEONMAP0(vfma_v),
7059 NEONMAP0(vfmaq_v),
7060 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7061 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7062 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7063 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7064 NEONMAP0(vld1_dup_v),
7065 NEONMAP1(vld1_v, arm_neon_vld1, 0),
7066 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
7067 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
7068 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
7069 NEONMAP0(vld1q_dup_v),
7070 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
7071 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
7072 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
7073 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
7074 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
7075 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
7076 NEONMAP1(vld2_v, arm_neon_vld2, 0),
7077 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
7078 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
7079 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
7080 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
7081 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
7082 NEONMAP1(vld3_v, arm_neon_vld3, 0),
7083 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
7084 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
7085 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
7086 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
7087 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
7088 NEONMAP1(vld4_v, arm_neon_vld4, 0),
7089 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
7090 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
7091 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
7092 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7093 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
7094 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
7095 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7096 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7097 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
7098 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
7099 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7100 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
7101 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
7102 NEONMAP0(vmovl_v),
7103 NEONMAP0(vmovn_v),
7104 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
7105 NEONMAP0(vmull_v),
7106 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
7107 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7108 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7109 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
7110 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7111 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7112 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
7113 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
7114 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
7115 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
7116 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
7117 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7118 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7119 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
7120 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
7121 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
7122 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
7123 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
7124 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
7125 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
7126 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
7127 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
7128 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
7129 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
7130 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
7131 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
7132 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
7133 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
7134 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
7135 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
7136 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
7137 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
7138 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7139 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7140 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7141 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7142 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7143 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7144 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
7145 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
7146 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7147 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7148 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
7149 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7150 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7151 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
7152 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
7153 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7154 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7155 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
7156 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
7157 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
7158 NEONMAP0(vrndi_v),
7159 NEONMAP0(vrndiq_v),
7160 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
7161 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
7162 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
7163 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
7164 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
7165 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
7166 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
7167 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
7168 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
7169 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7170 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7171 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7172 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7173 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7174 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7175 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
7176 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
7177 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
7178 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
7179 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
7180 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
7181 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
7182 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
7183 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
7184 NEONMAP0(vshl_n_v),
7185 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7186 NEONMAP0(vshll_n_v),
7187 NEONMAP0(vshlq_n_v),
7188 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7189 NEONMAP0(vshr_n_v),
7190 NEONMAP0(vshrn_n_v),
7191 NEONMAP0(vshrq_n_v),
7192 NEONMAP1(vst1_v, arm_neon_vst1, 0),
7193 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
7194 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
7195 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
7196 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
7197 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
7198 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
7199 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
7200 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
7201 NEONMAP1(vst2_v, arm_neon_vst2, 0),
7202 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
7203 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
7204 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
7205 NEONMAP1(vst3_v, arm_neon_vst3, 0),
7206 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
7207 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
7208 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
7209 NEONMAP1(vst4_v, arm_neon_vst4, 0),
7210 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
7211 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
7212 NEONMAP0(vsubhn_v),
7213 NEONMAP0(vtrn_v),
7214 NEONMAP0(vtrnq_v),
7215 NEONMAP0(vtst_v),
7216 NEONMAP0(vtstq_v),
7217 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
7218 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
7219 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
7220 NEONMAP0(vuzp_v),
7221 NEONMAP0(vuzpq_v),
7222 NEONMAP0(vzip_v),
7223 NEONMAP0(vzipq_v)
7224};
7225
7226static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7227 NEONMAP0(splat_lane_v),
7228 NEONMAP0(splat_laneq_v),
7229 NEONMAP0(splatq_lane_v),
7230 NEONMAP0(splatq_laneq_v),
7231 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
7232 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
7233 NEONMAP0(vadd_v),
7234 NEONMAP0(vaddhn_v),
7235 NEONMAP0(vaddq_p128),
7236 NEONMAP0(vaddq_v),
7237 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
7238 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
7239 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
7240 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
7241 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7242 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7243 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7244 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7245 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7246 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7247 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7248 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7249 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
7250 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
7251 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
7252 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
7253 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
7254 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7255 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7256 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7257 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7258 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7259 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7260 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
7261 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7262 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7263 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
7264 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
7265 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
7266 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
7267 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
7268 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
7269 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
7270 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
7271 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
7272 NEONMAP0(vceqz_v),
7273 NEONMAP0(vceqzq_v),
7274 NEONMAP0(vcgez_v),
7275 NEONMAP0(vcgezq_v),
7276 NEONMAP0(vcgtz_v),
7277 NEONMAP0(vcgtzq_v),
7278 NEONMAP0(vclez_v),
7279 NEONMAP0(vclezq_v),
7280 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
7281 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
7282 NEONMAP0(vcltz_v),
7283 NEONMAP0(vcltzq_v),
7284 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7285 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7286 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7287 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7288 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7289 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7290 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7291 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7292 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7293 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7294 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7295 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7296 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
7297 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7298 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7299 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
7300 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7301 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7302 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
7303 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7304 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7305 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
7306 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7307 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7308 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
7309 NEONMAP0(vcvt_f16_s16),
7310 NEONMAP0(vcvt_f16_u16),
7311 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
7312 NEONMAP0(vcvt_f32_v),
7313 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7314 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7315 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7316 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7317 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7318 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7319 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7320 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7321 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7322 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7323 NEONMAP0(vcvtq_f16_s16),
7324 NEONMAP0(vcvtq_f16_u16),
7325 NEONMAP0(vcvtq_f32_v),
7326 NEONMAP0(vcvtq_high_bf16_f32),
7327 NEONMAP0(vcvtq_low_bf16_f32),
7328 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7329 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7330 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7331 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7332 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7333 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7334 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7335 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7336 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7337 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7338 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
7339 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
7340 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
7341 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
7342 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
7343 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7344 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7345 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7346 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7347 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7348 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7349 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7350 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7351 NEONMAP0(vext_v),
7352 NEONMAP0(vextq_v),
7353 NEONMAP0(vfma_v),
7354 NEONMAP0(vfmaq_v),
7355 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
7356 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
7357 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
7358 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
7359 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
7360 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
7361 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
7362 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
7363 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7364 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7365 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7366 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7367 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
7368 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
7369 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
7370 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
7371 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
7372 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
7373 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
7374 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
7375 NEONMAP0(vmovl_v),
7376 NEONMAP0(vmovn_v),
7377 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
7378 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
7379 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
7380 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7381 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7382 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
7383 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
7384 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
7385 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7386 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7387 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
7388 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
7389 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
7390 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7391 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
7392 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
7393 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7394 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
7395 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
7396 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
7397 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
7398 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
7399 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7400 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7401 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7402 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7403 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7404 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7405 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7406 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7407 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7408 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7409 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7410 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7411 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7412 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7413 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7414 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7415 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7416 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7417 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7418 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7419 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7420 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7421 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7422 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7423 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7424 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7425 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7426 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7427 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7428 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7429 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7430 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7431 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7432 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7433 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7434 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7435 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7436 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7437 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7438 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7439 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7440 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7441 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7442 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7443 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7444 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7445 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7446 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7447 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7448 NEONMAP0(vrndi_v),
7449 NEONMAP0(vrndiq_v),
7450 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7451 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7452 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7453 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7454 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7455 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7456 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7457 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7458 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7459 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7460 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7461 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7462 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7463 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7464 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7465 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7466 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7467 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7468 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7469 NEONMAP0(vshl_n_v),
7470 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7471 NEONMAP0(vshll_n_v),
7472 NEONMAP0(vshlq_n_v),
7473 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7474 NEONMAP0(vshr_n_v),
7475 NEONMAP0(vshrn_n_v),
7476 NEONMAP0(vshrq_n_v),
7477 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7478 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7479 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7480 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7481 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7482 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7483 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7484 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7485 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7486 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7487 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7488 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7489 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7490 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7491 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7492 NEONMAP0(vsubhn_v),
7493 NEONMAP0(vtst_v),
7494 NEONMAP0(vtstq_v),
7495 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7496 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7497 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7498 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7499};
7500
7501static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7502 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7503 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7504 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7505 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7506 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7507 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7508 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7509 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7510 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7511 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7512 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7513 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7514 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7515 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7516 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7517 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7518 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7519 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7520 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7521 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7522 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7523 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7524 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7525 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7526 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7527 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7528 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7529 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7530 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7531 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7532 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7533 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7534 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7535 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7536 NEONMAP0(vcvth_bf16_f32),
7537 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7538 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7539 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7540 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7541 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7542 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7543 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7544 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7545 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7546 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7547 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7548 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7549 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7550 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7551 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7552 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7553 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7554 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7555 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7556 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7557 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7558 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7559 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7560 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7561 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7562 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7563 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7564 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7565 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7566 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7567 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7568 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7569 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7570 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7571 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7572 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7573 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7574 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7575 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7576 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7577 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7578 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7579 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7580 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7581 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7582 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7583 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7584 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7585 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7586 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7587 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7588 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7589 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7590 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7591 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7592 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7593 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7594 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7595 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7596 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7597 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7598 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7599 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7600 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7601 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7602 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7603 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7604 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7605 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7606 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7607 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7608 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7609 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7610 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7611 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7612 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7613 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7614 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7615 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7616 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7617 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7618 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7619 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7620 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7621 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7622 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7623 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7624 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7625 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7626 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7627 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7628 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7629 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7630 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7631 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7632 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7633 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7634 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7635 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7636 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7637 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7638 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7639 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7640 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7641 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7642 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7643 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7644 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7645 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7646 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7647 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7648 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7649 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7650 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7651 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7652 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7653 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7654 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7655 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7656 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7657 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7658 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7659 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7660 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7661 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7662 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7663 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7664 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7665 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7666 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7667 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7668 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7669 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7670 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7671 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7672 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7673 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7674 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7675 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7676 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7677 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7678 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7679 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7680 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7681 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7682 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7683 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7684 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7685 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7686 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7687 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7688 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7689 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7690 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7691 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7692 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7693 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7694 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7695 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7696 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7697 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7698 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7699 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7700 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7701 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7702 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7703 // FP16 scalar intrinisics go here.
7704 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7705 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7706 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7707 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7708 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7709 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7710 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7711 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7712 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7713 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7714 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7715 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7716 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7717 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7718 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7719 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7720 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7721 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7722 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7723 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7724 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7725 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7726 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7727 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7728 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7729 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7730 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7731 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7732 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7733 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7734 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7735 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7736 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7737 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7738};
7739
7740// Some intrinsics are equivalent for codegen.
7741static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7742 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7743 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7744 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7745 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7746 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7747 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7748 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7749 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7750 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7751 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7752 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7753 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7754 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7755 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7756 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7757 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7758 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7759 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7760 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7761 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7762 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7763 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7764 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7765 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7766 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7767 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7768 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7769 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7770 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7771 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7772 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7773 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7774 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7775 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7776 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7777 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7778 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7779 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7780 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7781 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7782 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7783 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7784 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7785 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7786 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7787 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7788 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7789 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7790 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7791 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7792 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7793 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7794 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7795 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7796 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7797 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7798 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7799 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7800 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7801 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7802 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7803 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7804 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7805 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7806 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7807 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7808 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7809 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7810 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7811 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7812 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7813 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7814 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7815 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7816 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7817 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7818 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7819 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7820 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7821 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7822 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7823 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7824 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7825 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7826 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7827 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7828 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7829 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7830 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7831 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7832 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7833 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7834 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7835 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7836 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7837 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7838 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7839 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7840 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7841 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7842 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7843 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7844 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7845 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7846 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7847 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7848 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7849 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7850 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7851 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7852 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7853 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7854 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7855 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7856 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7857 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7858 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7859 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7860 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7861 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7862 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7863 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7864 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7865 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7866 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7867 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7868 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7869 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7870 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7871 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7872 // arbitrary one to be handled as tha canonical variation.
7873 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7874 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7875 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7876 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7877 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7878 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7879 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7880 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7881 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7882 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7883 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7884 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7885};
7886
7887#undef NEONMAP0
7888#undef NEONMAP1
7889#undef NEONMAP2
7890
7891#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7892 { \
7893 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7894 TypeModifier \
7895 }
7896
7897#define SVEMAP2(NameBase, TypeModifier) \
7898 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7899static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7900#define GET_SVE_LLVM_INTRINSIC_MAP
7901#include "clang/Basic/arm_sve_builtin_cg.inc"
7902#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7903#undef GET_SVE_LLVM_INTRINSIC_MAP
7904};
7905
7906#undef SVEMAP1
7907#undef SVEMAP2
7908
7909#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7910 { \
7911 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7912 TypeModifier \
7913 }
7914
7915#define SMEMAP2(NameBase, TypeModifier) \
7916 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7917static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7918#define GET_SME_LLVM_INTRINSIC_MAP
7919#include "clang/Basic/arm_sme_builtin_cg.inc"
7920#undef GET_SME_LLVM_INTRINSIC_MAP
7921};
7922
7923#undef SMEMAP1
7924#undef SMEMAP2
7925
7927
7932
7933static const ARMVectorIntrinsicInfo *
7935 unsigned BuiltinID, bool &MapProvenSorted) {
7936
7937#ifndef NDEBUG
7938 if (!MapProvenSorted) {
7939 assert(llvm::is_sorted(IntrinsicMap));
7940 MapProvenSorted = true;
7941 }
7942#endif
7943
7944 const ARMVectorIntrinsicInfo *Builtin =
7945 llvm::lower_bound(IntrinsicMap, BuiltinID);
7946
7947 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7948 return Builtin;
7949
7950 return nullptr;
7951}
7952
7954 unsigned Modifier,
7955 llvm::Type *ArgType,
7956 const CallExpr *E) {
7957 int VectorSize = 0;
7958 if (Modifier & Use64BitVectors)
7959 VectorSize = 64;
7960 else if (Modifier & Use128BitVectors)
7961 VectorSize = 128;
7962
7963 // Return type.
7965 if (Modifier & AddRetType) {
7966 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7967 if (Modifier & VectorizeRetType)
7968 Ty = llvm::FixedVectorType::get(
7969 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7970
7971 Tys.push_back(Ty);
7972 }
7973
7974 // Arguments.
7975 if (Modifier & VectorizeArgTypes) {
7976 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7977 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7978 }
7979
7980 if (Modifier & (Add1ArgType | Add2ArgTypes))
7981 Tys.push_back(ArgType);
7982
7983 if (Modifier & Add2ArgTypes)
7984 Tys.push_back(ArgType);
7985
7986 if (Modifier & InventFloatType)
7987 Tys.push_back(FloatTy);
7988
7989 return CGM.getIntrinsic(IntrinsicID, Tys);
7990}
7991
7993 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7994 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7995 unsigned BuiltinID = SISDInfo.BuiltinID;
7996 unsigned int Int = SISDInfo.LLVMIntrinsic;
7997 unsigned Modifier = SISDInfo.TypeModifier;
7998 const char *s = SISDInfo.NameHint;
7999
8000 switch (BuiltinID) {
8001 case NEON::BI__builtin_neon_vcled_s64:
8002 case NEON::BI__builtin_neon_vcled_u64:
8003 case NEON::BI__builtin_neon_vcles_f32:
8004 case NEON::BI__builtin_neon_vcled_f64:
8005 case NEON::BI__builtin_neon_vcltd_s64:
8006 case NEON::BI__builtin_neon_vcltd_u64:
8007 case NEON::BI__builtin_neon_vclts_f32:
8008 case NEON::BI__builtin_neon_vcltd_f64:
8009 case NEON::BI__builtin_neon_vcales_f32:
8010 case NEON::BI__builtin_neon_vcaled_f64:
8011 case NEON::BI__builtin_neon_vcalts_f32:
8012 case NEON::BI__builtin_neon_vcaltd_f64:
8013 // Only one direction of comparisons actually exist, cmle is actually a cmge
8014 // with swapped operands. The table gives us the right intrinsic but we
8015 // still need to do the swap.
8016 std::swap(Ops[0], Ops[1]);
8017 break;
8018 }
8019
8020 assert(Int && "Generic code assumes a valid intrinsic");
8021
8022 // Determine the type(s) of this overloaded AArch64 intrinsic.
8023 const Expr *Arg = E->getArg(0);
8024 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
8025 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
8026
8027 int j = 0;
8028 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
8029 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
8030 ai != ae; ++ai, ++j) {
8031 llvm::Type *ArgTy = ai->getType();
8032 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
8033 ArgTy->getPrimitiveSizeInBits())
8034 continue;
8035
8036 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
8037 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
8038 // it before inserting.
8039 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
8040 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
8041 Ops[j] =
8042 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
8043 }
8044
8045 Value *Result = CGF.EmitNeonCall(F, Ops, s);
8046 llvm::Type *ResultType = CGF.ConvertType(E->getType());
8047 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
8048 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
8049 return CGF.Builder.CreateExtractElement(Result, C0);
8050
8051 return CGF.Builder.CreateBitCast(Result, ResultType, s);
8052}
8053
8055 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
8056 const char *NameHint, unsigned Modifier, const CallExpr *E,
8057 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
8058 llvm::Triple::ArchType Arch) {
8059 // Get the last argument, which specifies the vector type.
8060 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
8061 std::optional<llvm::APSInt> NeonTypeConst =
8063 if (!NeonTypeConst)
8064 return nullptr;
8065
8066 // Determine the type of this overloaded NEON intrinsic.
8067 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
8068 bool Usgn = Type.isUnsigned();
8069 bool Quad = Type.isQuad();
8070 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
8071 const bool AllowBFloatArgsAndRet =
8072 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
8073
8074 llvm::FixedVectorType *VTy =
8075 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
8076 llvm::Type *Ty = VTy;
8077 if (!Ty)
8078 return nullptr;
8079
8080 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8081 return Builder.getInt32(addr.getAlignment().getQuantity());
8082 };
8083
8084 unsigned Int = LLVMIntrinsic;
8085 if ((Modifier & UnsignedAlts) && !Usgn)
8086 Int = AltLLVMIntrinsic;
8087
8088 switch (BuiltinID) {
8089 default: break;
8090 case NEON::BI__builtin_neon_splat_lane_v:
8091 case NEON::BI__builtin_neon_splat_laneq_v:
8092 case NEON::BI__builtin_neon_splatq_lane_v:
8093 case NEON::BI__builtin_neon_splatq_laneq_v: {
8094 auto NumElements = VTy->getElementCount();
8095 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
8096 NumElements = NumElements * 2;
8097 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
8098 NumElements = NumElements.divideCoefficientBy(2);
8099
8100 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8101 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
8102 }
8103 case NEON::BI__builtin_neon_vpadd_v:
8104 case NEON::BI__builtin_neon_vpaddq_v:
8105 // We don't allow fp/int overloading of intrinsics.
8106 if (VTy->getElementType()->isFloatingPointTy() &&
8107 Int == Intrinsic::aarch64_neon_addp)
8108 Int = Intrinsic::aarch64_neon_faddp;
8109 break;
8110 case NEON::BI__builtin_neon_vabs_v:
8111 case NEON::BI__builtin_neon_vabsq_v:
8112 if (VTy->getElementType()->isFloatingPointTy())
8113 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
8114 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
8115 case NEON::BI__builtin_neon_vadd_v:
8116 case NEON::BI__builtin_neon_vaddq_v: {
8117 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
8118 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8119 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8120 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
8121 return Builder.CreateBitCast(Ops[0], Ty);
8122 }
8123 case NEON::BI__builtin_neon_vaddhn_v: {
8124 llvm::FixedVectorType *SrcTy =
8125 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8126
8127 // %sum = add <4 x i32> %lhs, %rhs
8128 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8129 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8130 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
8131
8132 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8133 Constant *ShiftAmt =
8134 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8135 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
8136
8137 // %res = trunc <4 x i32> %high to <4 x i16>
8138 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
8139 }
8140 case NEON::BI__builtin_neon_vcale_v:
8141 case NEON::BI__builtin_neon_vcaleq_v:
8142 case NEON::BI__builtin_neon_vcalt_v:
8143 case NEON::BI__builtin_neon_vcaltq_v:
8144 std::swap(Ops[0], Ops[1]);
8145 [[fallthrough]];
8146 case NEON::BI__builtin_neon_vcage_v:
8147 case NEON::BI__builtin_neon_vcageq_v:
8148 case NEON::BI__builtin_neon_vcagt_v:
8149 case NEON::BI__builtin_neon_vcagtq_v: {
8150 llvm::Type *Ty;
8151 switch (VTy->getScalarSizeInBits()) {
8152 default: llvm_unreachable("unexpected type");
8153 case 32:
8154 Ty = FloatTy;
8155 break;
8156 case 64:
8157 Ty = DoubleTy;
8158 break;
8159 case 16:
8160 Ty = HalfTy;
8161 break;
8162 }
8163 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
8164 llvm::Type *Tys[] = { VTy, VecFlt };
8165 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8166 return EmitNeonCall(F, Ops, NameHint);
8167 }
8168 case NEON::BI__builtin_neon_vceqz_v:
8169 case NEON::BI__builtin_neon_vceqzq_v:
8170 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
8171 ICmpInst::ICMP_EQ, "vceqz");
8172 case NEON::BI__builtin_neon_vcgez_v:
8173 case NEON::BI__builtin_neon_vcgezq_v:
8174 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
8175 ICmpInst::ICMP_SGE, "vcgez");
8176 case NEON::BI__builtin_neon_vclez_v:
8177 case NEON::BI__builtin_neon_vclezq_v:
8178 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
8179 ICmpInst::ICMP_SLE, "vclez");
8180 case NEON::BI__builtin_neon_vcgtz_v:
8181 case NEON::BI__builtin_neon_vcgtzq_v:
8182 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
8183 ICmpInst::ICMP_SGT, "vcgtz");
8184 case NEON::BI__builtin_neon_vcltz_v:
8185 case NEON::BI__builtin_neon_vcltzq_v:
8186 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
8187 ICmpInst::ICMP_SLT, "vcltz");
8188 case NEON::BI__builtin_neon_vclz_v:
8189 case NEON::BI__builtin_neon_vclzq_v:
8190 // We generate target-independent intrinsic, which needs a second argument
8191 // for whether or not clz of zero is undefined; on ARM it isn't.
8192 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
8193 break;
8194 case NEON::BI__builtin_neon_vcvt_f32_v:
8195 case NEON::BI__builtin_neon_vcvtq_f32_v:
8196 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8197 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
8198 HasLegalHalfType);
8199 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8200 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8201 case NEON::BI__builtin_neon_vcvt_f16_s16:
8202 case NEON::BI__builtin_neon_vcvt_f16_u16:
8203 case NEON::BI__builtin_neon_vcvtq_f16_s16:
8204 case NEON::BI__builtin_neon_vcvtq_f16_u16:
8205 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8206 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
8207 HasLegalHalfType);
8208 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8209 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8210 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
8211 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
8212 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
8213 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
8214 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8215 Function *F = CGM.getIntrinsic(Int, Tys);
8216 return EmitNeonCall(F, Ops, "vcvt_n");
8217 }
8218 case NEON::BI__builtin_neon_vcvt_n_f32_v:
8219 case NEON::BI__builtin_neon_vcvt_n_f64_v:
8220 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
8221 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
8222 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8223 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
8224 Function *F = CGM.getIntrinsic(Int, Tys);
8225 return EmitNeonCall(F, Ops, "vcvt_n");
8226 }
8227 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
8228 case NEON::BI__builtin_neon_vcvt_n_s32_v:
8229 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
8230 case NEON::BI__builtin_neon_vcvt_n_u32_v:
8231 case NEON::BI__builtin_neon_vcvt_n_s64_v:
8232 case NEON::BI__builtin_neon_vcvt_n_u64_v:
8233 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
8234 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
8235 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
8236 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
8237 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
8238 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
8239 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8240 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8241 return EmitNeonCall(F, Ops, "vcvt_n");
8242 }
8243 case NEON::BI__builtin_neon_vcvt_s32_v:
8244 case NEON::BI__builtin_neon_vcvt_u32_v:
8245 case NEON::BI__builtin_neon_vcvt_s64_v:
8246 case NEON::BI__builtin_neon_vcvt_u64_v:
8247 case NEON::BI__builtin_neon_vcvt_s16_f16:
8248 case NEON::BI__builtin_neon_vcvt_u16_f16:
8249 case NEON::BI__builtin_neon_vcvtq_s32_v:
8250 case NEON::BI__builtin_neon_vcvtq_u32_v:
8251 case NEON::BI__builtin_neon_vcvtq_s64_v:
8252 case NEON::BI__builtin_neon_vcvtq_u64_v:
8253 case NEON::BI__builtin_neon_vcvtq_s16_f16:
8254 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
8255 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8256 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
8257 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
8258 }
8259 case NEON::BI__builtin_neon_vcvta_s16_f16:
8260 case NEON::BI__builtin_neon_vcvta_s32_v:
8261 case NEON::BI__builtin_neon_vcvta_s64_v:
8262 case NEON::BI__builtin_neon_vcvta_u16_f16:
8263 case NEON::BI__builtin_neon_vcvta_u32_v:
8264 case NEON::BI__builtin_neon_vcvta_u64_v:
8265 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
8266 case NEON::BI__builtin_neon_vcvtaq_s32_v:
8267 case NEON::BI__builtin_neon_vcvtaq_s64_v:
8268 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
8269 case NEON::BI__builtin_neon_vcvtaq_u32_v:
8270 case NEON::BI__builtin_neon_vcvtaq_u64_v:
8271 case NEON::BI__builtin_neon_vcvtn_s16_f16:
8272 case NEON::BI__builtin_neon_vcvtn_s32_v:
8273 case NEON::BI__builtin_neon_vcvtn_s64_v:
8274 case NEON::BI__builtin_neon_vcvtn_u16_f16:
8275 case NEON::BI__builtin_neon_vcvtn_u32_v:
8276 case NEON::BI__builtin_neon_vcvtn_u64_v:
8277 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
8278 case NEON::BI__builtin_neon_vcvtnq_s32_v:
8279 case NEON::BI__builtin_neon_vcvtnq_s64_v:
8280 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
8281 case NEON::BI__builtin_neon_vcvtnq_u32_v:
8282 case NEON::BI__builtin_neon_vcvtnq_u64_v:
8283 case NEON::BI__builtin_neon_vcvtp_s16_f16:
8284 case NEON::BI__builtin_neon_vcvtp_s32_v:
8285 case NEON::BI__builtin_neon_vcvtp_s64_v:
8286 case NEON::BI__builtin_neon_vcvtp_u16_f16:
8287 case NEON::BI__builtin_neon_vcvtp_u32_v:
8288 case NEON::BI__builtin_neon_vcvtp_u64_v:
8289 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
8290 case NEON::BI__builtin_neon_vcvtpq_s32_v:
8291 case NEON::BI__builtin_neon_vcvtpq_s64_v:
8292 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
8293 case NEON::BI__builtin_neon_vcvtpq_u32_v:
8294 case NEON::BI__builtin_neon_vcvtpq_u64_v:
8295 case NEON::BI__builtin_neon_vcvtm_s16_f16:
8296 case NEON::BI__builtin_neon_vcvtm_s32_v:
8297 case NEON::BI__builtin_neon_vcvtm_s64_v:
8298 case NEON::BI__builtin_neon_vcvtm_u16_f16:
8299 case NEON::BI__builtin_neon_vcvtm_u32_v:
8300 case NEON::BI__builtin_neon_vcvtm_u64_v:
8301 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
8302 case NEON::BI__builtin_neon_vcvtmq_s32_v:
8303 case NEON::BI__builtin_neon_vcvtmq_s64_v:
8304 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
8305 case NEON::BI__builtin_neon_vcvtmq_u32_v:
8306 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8307 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8308 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8309 }
8310 case NEON::BI__builtin_neon_vcvtx_f32_v: {
8311 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
8312 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8313
8314 }
8315 case NEON::BI__builtin_neon_vext_v:
8316 case NEON::BI__builtin_neon_vextq_v: {
8317 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
8318 SmallVector<int, 16> Indices;
8319 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8320 Indices.push_back(i+CV);
8321
8322 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8323 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8324 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
8325 }
8326 case NEON::BI__builtin_neon_vfma_v:
8327 case NEON::BI__builtin_neon_vfmaq_v: {
8328 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8329 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8330 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8331
8332 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
8334 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
8335 {Ops[1], Ops[2], Ops[0]});
8336 }
8337 case NEON::BI__builtin_neon_vld1_v:
8338 case NEON::BI__builtin_neon_vld1q_v: {
8339 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8340 Ops.push_back(getAlignmentValue32(PtrOp0));
8341 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
8342 }
8343 case NEON::BI__builtin_neon_vld1_x2_v:
8344 case NEON::BI__builtin_neon_vld1q_x2_v:
8345 case NEON::BI__builtin_neon_vld1_x3_v:
8346 case NEON::BI__builtin_neon_vld1q_x3_v:
8347 case NEON::BI__builtin_neon_vld1_x4_v:
8348 case NEON::BI__builtin_neon_vld1q_x4_v: {
8349 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8350 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8351 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
8352 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8353 }
8354 case NEON::BI__builtin_neon_vld2_v:
8355 case NEON::BI__builtin_neon_vld2q_v:
8356 case NEON::BI__builtin_neon_vld3_v:
8357 case NEON::BI__builtin_neon_vld3q_v:
8358 case NEON::BI__builtin_neon_vld4_v:
8359 case NEON::BI__builtin_neon_vld4q_v:
8360 case NEON::BI__builtin_neon_vld2_dup_v:
8361 case NEON::BI__builtin_neon_vld2q_dup_v:
8362 case NEON::BI__builtin_neon_vld3_dup_v:
8363 case NEON::BI__builtin_neon_vld3q_dup_v:
8364 case NEON::BI__builtin_neon_vld4_dup_v:
8365 case NEON::BI__builtin_neon_vld4q_dup_v: {
8366 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8367 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8368 Value *Align = getAlignmentValue32(PtrOp1);
8369 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
8370 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8371 }
8372 case NEON::BI__builtin_neon_vld1_dup_v:
8373 case NEON::BI__builtin_neon_vld1q_dup_v: {
8374 Value *V = PoisonValue::get(Ty);
8375 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8376 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
8377 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
8378 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
8379 return EmitNeonSplat(Ops[0], CI);
8380 }
8381 case NEON::BI__builtin_neon_vld2_lane_v:
8382 case NEON::BI__builtin_neon_vld2q_lane_v:
8383 case NEON::BI__builtin_neon_vld3_lane_v:
8384 case NEON::BI__builtin_neon_vld3q_lane_v:
8385 case NEON::BI__builtin_neon_vld4_lane_v:
8386 case NEON::BI__builtin_neon_vld4q_lane_v: {
8387 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8388 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8389 for (unsigned I = 2; I < Ops.size() - 1; ++I)
8390 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
8391 Ops.push_back(getAlignmentValue32(PtrOp1));
8392 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
8393 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8394 }
8395 case NEON::BI__builtin_neon_vmovl_v: {
8396 llvm::FixedVectorType *DTy =
8397 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8398 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
8399 if (Usgn)
8400 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8401 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8402 }
8403 case NEON::BI__builtin_neon_vmovn_v: {
8404 llvm::FixedVectorType *QTy =
8405 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8406 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8407 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8408 }
8409 case NEON::BI__builtin_neon_vmull_v:
8410 // FIXME: the integer vmull operations could be emitted in terms of pure
8411 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8412 // hoisting the exts outside loops. Until global ISel comes along that can
8413 // see through such movement this leads to bad CodeGen. So we need an
8414 // intrinsic for now.
8415 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8416 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8417 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8418 case NEON::BI__builtin_neon_vpadal_v:
8419 case NEON::BI__builtin_neon_vpadalq_v: {
8420 // The source operand type has twice as many elements of half the size.
8421 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8422 llvm::Type *EltTy =
8423 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8424 auto *NarrowTy =
8425 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8426 llvm::Type *Tys[2] = { Ty, NarrowTy };
8427 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8428 }
8429 case NEON::BI__builtin_neon_vpaddl_v:
8430 case NEON::BI__builtin_neon_vpaddlq_v: {
8431 // The source operand type has twice as many elements of half the size.
8432 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8433 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8434 auto *NarrowTy =
8435 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8436 llvm::Type *Tys[2] = { Ty, NarrowTy };
8437 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8438 }
8439 case NEON::BI__builtin_neon_vqdmlal_v:
8440 case NEON::BI__builtin_neon_vqdmlsl_v: {
8441 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8442 Ops[1] =
8443 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8444 Ops.resize(2);
8445 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8446 }
8447 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8448 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8449 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8450 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8451 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8452 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8453 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8454 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8455 RTy->getNumElements() * 2);
8456 llvm::Type *Tys[2] = {
8457 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8458 /*isQuad*/ false))};
8459 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8460 }
8461 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8462 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8463 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8464 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8465 llvm::Type *Tys[2] = {
8466 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8467 /*isQuad*/ true))};
8468 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8469 }
8470 case NEON::BI__builtin_neon_vqshl_n_v:
8471 case NEON::BI__builtin_neon_vqshlq_n_v:
8472 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8473 1, false);
8474 case NEON::BI__builtin_neon_vqshlu_n_v:
8475 case NEON::BI__builtin_neon_vqshluq_n_v:
8476 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8477 1, false);
8478 case NEON::BI__builtin_neon_vrecpe_v:
8479 case NEON::BI__builtin_neon_vrecpeq_v:
8480 case NEON::BI__builtin_neon_vrsqrte_v:
8481 case NEON::BI__builtin_neon_vrsqrteq_v:
8482 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8483 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8484 case NEON::BI__builtin_neon_vrndi_v:
8485 case NEON::BI__builtin_neon_vrndiq_v:
8486 Int = Builder.getIsFPConstrained()
8487 ? Intrinsic::experimental_constrained_nearbyint
8488 : Intrinsic::nearbyint;
8489 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8490 case NEON::BI__builtin_neon_vrshr_n_v:
8491 case NEON::BI__builtin_neon_vrshrq_n_v:
8492 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8493 1, true);
8494 case NEON::BI__builtin_neon_vsha512hq_u64:
8495 case NEON::BI__builtin_neon_vsha512h2q_u64:
8496 case NEON::BI__builtin_neon_vsha512su0q_u64:
8497 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8498 Function *F = CGM.getIntrinsic(Int);
8499 return EmitNeonCall(F, Ops, "");
8500 }
8501 case NEON::BI__builtin_neon_vshl_n_v:
8502 case NEON::BI__builtin_neon_vshlq_n_v:
8503 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8504 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8505 "vshl_n");
8506 case NEON::BI__builtin_neon_vshll_n_v: {
8507 llvm::FixedVectorType *SrcTy =
8508 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8509 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8510 if (Usgn)
8511 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8512 else
8513 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8514 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8515 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8516 }
8517 case NEON::BI__builtin_neon_vshrn_n_v: {
8518 llvm::FixedVectorType *SrcTy =
8519 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8520 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8521 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8522 if (Usgn)
8523 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8524 else
8525 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8526 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8527 }
8528 case NEON::BI__builtin_neon_vshr_n_v:
8529 case NEON::BI__builtin_neon_vshrq_n_v:
8530 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8531 case NEON::BI__builtin_neon_vst1_v:
8532 case NEON::BI__builtin_neon_vst1q_v:
8533 case NEON::BI__builtin_neon_vst2_v:
8534 case NEON::BI__builtin_neon_vst2q_v:
8535 case NEON::BI__builtin_neon_vst3_v:
8536 case NEON::BI__builtin_neon_vst3q_v:
8537 case NEON::BI__builtin_neon_vst4_v:
8538 case NEON::BI__builtin_neon_vst4q_v:
8539 case NEON::BI__builtin_neon_vst2_lane_v:
8540 case NEON::BI__builtin_neon_vst2q_lane_v:
8541 case NEON::BI__builtin_neon_vst3_lane_v:
8542 case NEON::BI__builtin_neon_vst3q_lane_v:
8543 case NEON::BI__builtin_neon_vst4_lane_v:
8544 case NEON::BI__builtin_neon_vst4q_lane_v: {
8545 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8546 Ops.push_back(getAlignmentValue32(PtrOp0));
8547 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8548 }
8549 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8550 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8551 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8552 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8553 case NEON::BI__builtin_neon_vsm4eq_u32: {
8554 Function *F = CGM.getIntrinsic(Int);
8555 return EmitNeonCall(F, Ops, "");
8556 }
8557 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8558 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8559 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8560 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8561 Function *F = CGM.getIntrinsic(Int);
8562 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8563 return EmitNeonCall(F, Ops, "");
8564 }
8565 case NEON::BI__builtin_neon_vst1_x2_v:
8566 case NEON::BI__builtin_neon_vst1q_x2_v:
8567 case NEON::BI__builtin_neon_vst1_x3_v:
8568 case NEON::BI__builtin_neon_vst1q_x3_v:
8569 case NEON::BI__builtin_neon_vst1_x4_v:
8570 case NEON::BI__builtin_neon_vst1q_x4_v: {
8571 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8572 // in AArch64 it comes last. We may want to stick to one or another.
8573 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8574 Arch == llvm::Triple::aarch64_32) {
8575 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8576 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8577 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8578 }
8579 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8580 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8581 }
8582 case NEON::BI__builtin_neon_vsubhn_v: {
8583 llvm::FixedVectorType *SrcTy =
8584 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8585
8586 // %sum = add <4 x i32> %lhs, %rhs
8587 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8588 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8589 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8590
8591 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8592 Constant *ShiftAmt =
8593 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8594 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8595
8596 // %res = trunc <4 x i32> %high to <4 x i16>
8597 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8598 }
8599 case NEON::BI__builtin_neon_vtrn_v:
8600 case NEON::BI__builtin_neon_vtrnq_v: {
8601 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8602 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8603 Value *SV = nullptr;
8604
8605 for (unsigned vi = 0; vi != 2; ++vi) {
8606 SmallVector<int, 16> Indices;
8607 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8608 Indices.push_back(i+vi);
8609 Indices.push_back(i+e+vi);
8610 }
8611 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8612 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8613 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8614 }
8615 return SV;
8616 }
8617 case NEON::BI__builtin_neon_vtst_v:
8618 case NEON::BI__builtin_neon_vtstq_v: {
8619 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8620 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8621 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8622 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8623 ConstantAggregateZero::get(Ty));
8624 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8625 }
8626 case NEON::BI__builtin_neon_vuzp_v:
8627 case NEON::BI__builtin_neon_vuzpq_v: {
8628 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8629 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8630 Value *SV = nullptr;
8631
8632 for (unsigned vi = 0; vi != 2; ++vi) {
8633 SmallVector<int, 16> Indices;
8634 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8635 Indices.push_back(2*i+vi);
8636
8637 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8638 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8639 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8640 }
8641 return SV;
8642 }
8643 case NEON::BI__builtin_neon_vxarq_u64: {
8644 Function *F = CGM.getIntrinsic(Int);
8645 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8646 return EmitNeonCall(F, Ops, "");
8647 }
8648 case NEON::BI__builtin_neon_vzip_v:
8649 case NEON::BI__builtin_neon_vzipq_v: {
8650 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8651 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8652 Value *SV = nullptr;
8653
8654 for (unsigned vi = 0; vi != 2; ++vi) {
8655 SmallVector<int, 16> Indices;
8656 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8657 Indices.push_back((i + vi*e) >> 1);
8658 Indices.push_back(((i + vi*e) >> 1)+e);
8659 }
8660 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8661 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8662 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8663 }
8664 return SV;
8665 }
8666 case NEON::BI__builtin_neon_vdot_s32:
8667 case NEON::BI__builtin_neon_vdot_u32:
8668 case NEON::BI__builtin_neon_vdotq_s32:
8669 case NEON::BI__builtin_neon_vdotq_u32: {
8670 auto *InputTy =
8671 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8672 llvm::Type *Tys[2] = { Ty, InputTy };
8673 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8674 }
8675 case NEON::BI__builtin_neon_vfmlal_low_f16:
8676 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8677 auto *InputTy =
8678 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8679 llvm::Type *Tys[2] = { Ty, InputTy };
8680 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8681 }
8682 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8683 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8684 auto *InputTy =
8685 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8686 llvm::Type *Tys[2] = { Ty, InputTy };
8687 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8688 }
8689 case NEON::BI__builtin_neon_vfmlal_high_f16:
8690 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8691 auto *InputTy =
8692 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8693 llvm::Type *Tys[2] = { Ty, InputTy };
8694 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8695 }
8696 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8697 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8698 auto *InputTy =
8699 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8700 llvm::Type *Tys[2] = { Ty, InputTy };
8701 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8702 }
8703 case NEON::BI__builtin_neon_vmmlaq_s32:
8704 case NEON::BI__builtin_neon_vmmlaq_u32: {
8705 auto *InputTy =
8706 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8707 llvm::Type *Tys[2] = { Ty, InputTy };
8708 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8709 }
8710 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8711 auto *InputTy =
8712 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8713 llvm::Type *Tys[2] = { Ty, InputTy };
8714 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8715 }
8716 case NEON::BI__builtin_neon_vusdot_s32:
8717 case NEON::BI__builtin_neon_vusdotq_s32: {
8718 auto *InputTy =
8719 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8720 llvm::Type *Tys[2] = { Ty, InputTy };
8721 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8722 }
8723 case NEON::BI__builtin_neon_vbfdot_f32:
8724 case NEON::BI__builtin_neon_vbfdotq_f32: {
8725 llvm::Type *InputTy =
8726 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8727 llvm::Type *Tys[2] = { Ty, InputTy };
8728 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8729 }
8730 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8731 llvm::Type *Tys[1] = { Ty };
8732 Function *F = CGM.getIntrinsic(Int, Tys);
8733 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8734 }
8735
8736 }
8737
8738 assert(Int && "Expected valid intrinsic number");
8739
8740 // Determine the type(s) of this overloaded AArch64 intrinsic.
8741 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8742
8743 Value *Result = EmitNeonCall(F, Ops, NameHint);
8744 llvm::Type *ResultType = ConvertType(E->getType());
8745 // AArch64 intrinsic one-element vector type cast to
8746 // scalar type expected by the builtin
8747 return Builder.CreateBitCast(Result, ResultType, NameHint);
8748}
8749
8751 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8752 const CmpInst::Predicate Ip, const Twine &Name) {
8753 llvm::Type *OTy = Op->getType();
8754
8755 // FIXME: this is utterly horrific. We should not be looking at previous
8756 // codegen context to find out what needs doing. Unfortunately TableGen
8757 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8758 // (etc).
8759 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8760 OTy = BI->getOperand(0)->getType();
8761
8762 Op = Builder.CreateBitCast(Op, OTy);
8763 if (OTy->getScalarType()->isFloatingPointTy()) {
8764 if (Fp == CmpInst::FCMP_OEQ)
8765 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8766 else
8767 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8768 } else {
8769 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8770 }
8771 return Builder.CreateSExt(Op, Ty, Name);
8772}
8773
8775 Value *ExtOp, Value *IndexOp,
8776 llvm::Type *ResTy, unsigned IntID,
8777 const char *Name) {
8779 if (ExtOp)
8780 TblOps.push_back(ExtOp);
8781
8782 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8783 SmallVector<int, 16> Indices;
8784 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8785 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8786 Indices.push_back(2*i);
8787 Indices.push_back(2*i+1);
8788 }
8789
8790 int PairPos = 0, End = Ops.size() - 1;
8791 while (PairPos < End) {
8792 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8793 Ops[PairPos+1], Indices,
8794 Name));
8795 PairPos += 2;
8796 }
8797
8798 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8799 // of the 128-bit lookup table with zero.
8800 if (PairPos == End) {
8801 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8802 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8803 ZeroTbl, Indices, Name));
8804 }
8805
8806 Function *TblF;
8807 TblOps.push_back(IndexOp);
8808 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8809
8810 return CGF.EmitNeonCall(TblF, TblOps, Name);
8811}
8812
8813Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8814 unsigned Value;
8815 switch (BuiltinID) {
8816 default:
8817 return nullptr;
8818 case clang::ARM::BI__builtin_arm_nop:
8819 Value = 0;
8820 break;
8821 case clang::ARM::BI__builtin_arm_yield:
8822 case clang::ARM::BI__yield:
8823 Value = 1;
8824 break;
8825 case clang::ARM::BI__builtin_arm_wfe:
8826 case clang::ARM::BI__wfe:
8827 Value = 2;
8828 break;
8829 case clang::ARM::BI__builtin_arm_wfi:
8830 case clang::ARM::BI__wfi:
8831 Value = 3;
8832 break;
8833 case clang::ARM::BI__builtin_arm_sev:
8834 case clang::ARM::BI__sev:
8835 Value = 4;
8836 break;
8837 case clang::ARM::BI__builtin_arm_sevl:
8838 case clang::ARM::BI__sevl:
8839 Value = 5;
8840 break;
8841 }
8842
8843 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8844 llvm::ConstantInt::get(Int32Ty, Value));
8845}
8846
8851};
8852
8853// Generates the IR for __builtin_read_exec_*.
8854// Lowers the builtin to amdgcn_ballot intrinsic.
8856 llvm::Type *RegisterType,
8857 llvm::Type *ValueType, bool isExecHi) {
8858 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8859 CodeGen::CodeGenModule &CGM = CGF.CGM;
8860
8861 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8862 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8863
8864 if (isExecHi) {
8865 Value *Rt2 = Builder.CreateLShr(Call, 32);
8866 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8867 return Rt2;
8868 }
8869
8870 return Call;
8871}
8872
8873// Generates the IR for the read/write special register builtin,
8874// ValueType is the type of the value that is to be written or read,
8875// RegisterType is the type of the register being written to or read from.
8877 const CallExpr *E,
8878 llvm::Type *RegisterType,
8879 llvm::Type *ValueType,
8880 SpecialRegisterAccessKind AccessKind,
8881 StringRef SysReg = "") {
8882 // write and register intrinsics only support 32, 64 and 128 bit operations.
8883 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8884 RegisterType->isIntegerTy(128)) &&
8885 "Unsupported size for register.");
8886
8887 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8888 CodeGen::CodeGenModule &CGM = CGF.CGM;
8889 LLVMContext &Context = CGM.getLLVMContext();
8890
8891 if (SysReg.empty()) {
8892 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8893 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8894 }
8895
8896 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8897 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8898 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8899
8900 llvm::Type *Types[] = { RegisterType };
8901
8902 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8903 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8904 && "Can't fit 64-bit value in 32-bit register");
8905
8906 if (AccessKind != Write) {
8907 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8908 llvm::Function *F = CGM.getIntrinsic(
8909 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8910 : llvm::Intrinsic::read_register,
8911 Types);
8912 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8913
8914 if (MixedTypes)
8915 // Read into 64 bit register and then truncate result to 32 bit.
8916 return Builder.CreateTrunc(Call, ValueType);
8917
8918 if (ValueType->isPointerTy())
8919 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8920 return Builder.CreateIntToPtr(Call, ValueType);
8921
8922 return Call;
8923 }
8924
8925 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8926 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8927 if (MixedTypes) {
8928 // Extend 32 bit write value to 64 bit to pass to write.
8929 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8930 return Builder.CreateCall(F, { Metadata, ArgValue });
8931 }
8932
8933 if (ValueType->isPointerTy()) {
8934 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8935 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8936 return Builder.CreateCall(F, { Metadata, ArgValue });
8937 }
8938
8939 return Builder.CreateCall(F, { Metadata, ArgValue });
8940}
8941
8942/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8943/// argument that specifies the vector type.
8944static bool HasExtraNeonArgument(unsigned BuiltinID) {
8945 switch (BuiltinID) {
8946 default: break;
8947 case NEON::BI__builtin_neon_vget_lane_i8:
8948 case NEON::BI__builtin_neon_vget_lane_i16:
8949 case NEON::BI__builtin_neon_vget_lane_bf16:
8950 case NEON::BI__builtin_neon_vget_lane_i32:
8951 case NEON::BI__builtin_neon_vget_lane_i64:
8952 case NEON::BI__builtin_neon_vget_lane_f32:
8953 case NEON::BI__builtin_neon_vgetq_lane_i8:
8954 case NEON::BI__builtin_neon_vgetq_lane_i16:
8955 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8956 case NEON::BI__builtin_neon_vgetq_lane_i32:
8957 case NEON::BI__builtin_neon_vgetq_lane_i64:
8958 case NEON::BI__builtin_neon_vgetq_lane_f32:
8959 case NEON::BI__builtin_neon_vduph_lane_bf16:
8960 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8961 case NEON::BI__builtin_neon_vset_lane_i8:
8962 case NEON::BI__builtin_neon_vset_lane_i16:
8963 case NEON::BI__builtin_neon_vset_lane_bf16:
8964 case NEON::BI__builtin_neon_vset_lane_i32:
8965 case NEON::BI__builtin_neon_vset_lane_i64:
8966 case NEON::BI__builtin_neon_vset_lane_f32:
8967 case NEON::BI__builtin_neon_vsetq_lane_i8:
8968 case NEON::BI__builtin_neon_vsetq_lane_i16:
8969 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8970 case NEON::BI__builtin_neon_vsetq_lane_i32:
8971 case NEON::BI__builtin_neon_vsetq_lane_i64:
8972 case NEON::BI__builtin_neon_vsetq_lane_f32:
8973 case NEON::BI__builtin_neon_vsha1h_u32:
8974 case NEON::BI__builtin_neon_vsha1cq_u32:
8975 case NEON::BI__builtin_neon_vsha1pq_u32:
8976 case NEON::BI__builtin_neon_vsha1mq_u32:
8977 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8978 case clang::ARM::BI_MoveToCoprocessor:
8979 case clang::ARM::BI_MoveToCoprocessor2:
8980 return false;
8981 }
8982 return true;
8983}
8984
8985Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8986 const CallExpr *E,
8987 ReturnValueSlot ReturnValue,
8988 llvm::Triple::ArchType Arch) {
8989 if (auto Hint = GetValueForARMHint(BuiltinID))
8990 return Hint;
8991
8992 if (BuiltinID == clang::ARM::BI__emit) {
8993 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8994 llvm::FunctionType *FTy =
8995 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8996
8998 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8999 llvm_unreachable("Sema will ensure that the parameter is constant");
9000
9001 llvm::APSInt Value = Result.Val.getInt();
9002 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
9003
9004 llvm::InlineAsm *Emit =
9005 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
9006 /*hasSideEffects=*/true)
9007 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
9008 /*hasSideEffects=*/true);
9009
9010 return Builder.CreateCall(Emit);
9011 }
9012
9013 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
9014 Value *Option = EmitScalarExpr(E->getArg(0));
9015 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
9016 }
9017
9018 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
9019 Value *Address = EmitScalarExpr(E->getArg(0));
9020 Value *RW = EmitScalarExpr(E->getArg(1));
9021 Value *IsData = EmitScalarExpr(E->getArg(2));
9022
9023 // Locality is not supported on ARM target
9024 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
9025
9026 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
9027 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
9028 }
9029
9030 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
9031 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9032 return Builder.CreateCall(
9033 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
9034 }
9035
9036 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
9037 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
9038 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9039 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
9040 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
9041 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
9042 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
9043 return Res;
9044 }
9045
9046
9047 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
9048 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9049 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
9050 }
9051 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
9052 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9053 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
9054 "cls");
9055 }
9056
9057 if (BuiltinID == clang::ARM::BI__clear_cache) {
9058 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
9059 const FunctionDecl *FD = E->getDirectCallee();
9060 Value *Ops[2];
9061 for (unsigned i = 0; i < 2; i++)
9062 Ops[i] = EmitScalarExpr(E->getArg(i));
9063 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
9064 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
9065 StringRef Name = FD->getName();
9066 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
9067 }
9068
9069 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
9070 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
9071 Function *F;
9072
9073 switch (BuiltinID) {
9074 default: llvm_unreachable("unexpected builtin");
9075 case clang::ARM::BI__builtin_arm_mcrr:
9076 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
9077 break;
9078 case clang::ARM::BI__builtin_arm_mcrr2:
9079 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
9080 break;
9081 }
9082
9083 // MCRR{2} instruction has 5 operands but
9084 // the intrinsic has 4 because Rt and Rt2
9085 // are represented as a single unsigned 64
9086 // bit integer in the intrinsic definition
9087 // but internally it's represented as 2 32
9088 // bit integers.
9089
9090 Value *Coproc = EmitScalarExpr(E->getArg(0));
9091 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9092 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
9093 Value *CRm = EmitScalarExpr(E->getArg(3));
9094
9095 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9096 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
9097 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
9098 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
9099
9100 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
9101 }
9102
9103 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
9104 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
9105 Function *F;
9106
9107 switch (BuiltinID) {
9108 default: llvm_unreachable("unexpected builtin");
9109 case clang::ARM::BI__builtin_arm_mrrc:
9110 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
9111 break;
9112 case clang::ARM::BI__builtin_arm_mrrc2:
9113 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
9114 break;
9115 }
9116
9117 Value *Coproc = EmitScalarExpr(E->getArg(0));
9118 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9119 Value *CRm = EmitScalarExpr(E->getArg(2));
9120 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
9121
9122 // Returns an unsigned 64 bit integer, represented
9123 // as two 32 bit integers.
9124
9125 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
9126 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
9127 Rt = Builder.CreateZExt(Rt, Int64Ty);
9128 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
9129
9130 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
9131 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
9132 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
9133
9134 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
9135 }
9136
9137 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
9138 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9139 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
9140 getContext().getTypeSize(E->getType()) == 64) ||
9141 BuiltinID == clang::ARM::BI__ldrexd) {
9142 Function *F;
9143
9144 switch (BuiltinID) {
9145 default: llvm_unreachable("unexpected builtin");
9146 case clang::ARM::BI__builtin_arm_ldaex:
9147 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
9148 break;
9149 case clang::ARM::BI__builtin_arm_ldrexd:
9150 case clang::ARM::BI__builtin_arm_ldrex:
9151 case clang::ARM::BI__ldrexd:
9152 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
9153 break;
9154 }
9155
9156 Value *LdPtr = EmitScalarExpr(E->getArg(0));
9157 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
9158
9159 Value *Val0 = Builder.CreateExtractValue(Val, 1);
9160 Value *Val1 = Builder.CreateExtractValue(Val, 0);
9161 Val0 = Builder.CreateZExt(Val0, Int64Ty);
9162 Val1 = Builder.CreateZExt(Val1, Int64Ty);
9163
9164 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
9165 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
9166 Val = Builder.CreateOr(Val, Val1);
9167 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
9168 }
9169
9170 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9171 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
9172 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
9173
9174 QualType Ty = E->getType();
9175 llvm::Type *RealResTy = ConvertType(Ty);
9176 llvm::Type *IntTy =
9177 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9178
9180 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
9181 : Intrinsic::arm_ldrex,
9182 UnqualPtrTy);
9183 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
9184 Val->addParamAttr(
9185 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
9186
9187 if (RealResTy->isPointerTy())
9188 return Builder.CreateIntToPtr(Val, RealResTy);
9189 else {
9190 llvm::Type *IntResTy = llvm::IntegerType::get(
9191 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
9192 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
9193 RealResTy);
9194 }
9195 }
9196
9197 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
9198 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
9199 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
9200 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
9202 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
9203 : Intrinsic::arm_strexd);
9204 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
9205
9206 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9207 Value *Val = EmitScalarExpr(E->getArg(0));
9208 Builder.CreateStore(Val, Tmp);
9209
9210 Address LdPtr = Tmp.withElementType(STy);
9211 Val = Builder.CreateLoad(LdPtr);
9212
9213 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
9214 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
9215 Value *StPtr = EmitScalarExpr(E->getArg(1));
9216 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
9217 }
9218
9219 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
9220 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
9221 Value *StoreVal = EmitScalarExpr(E->getArg(0));
9222 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
9223
9224 QualType Ty = E->getArg(0)->getType();
9225 llvm::Type *StoreTy =
9226 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9227
9228 if (StoreVal->getType()->isPointerTy())
9229 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
9230 else {
9231 llvm::Type *IntTy = llvm::IntegerType::get(
9233 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
9234 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
9235 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
9236 }
9237
9239 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
9240 : Intrinsic::arm_strex,
9241 StoreAddr->getType());
9242
9243 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
9244 CI->addParamAttr(
9245 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
9246 return CI;
9247 }
9248
9249 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
9250 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
9251 return Builder.CreateCall(F);
9252 }
9253
9254 // CRC32
9255 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
9256 switch (BuiltinID) {
9257 case clang::ARM::BI__builtin_arm_crc32b:
9258 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
9259 case clang::ARM::BI__builtin_arm_crc32cb:
9260 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
9261 case clang::ARM::BI__builtin_arm_crc32h:
9262 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
9263 case clang::ARM::BI__builtin_arm_crc32ch:
9264 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
9265 case clang::ARM::BI__builtin_arm_crc32w:
9266 case clang::ARM::BI__builtin_arm_crc32d:
9267 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
9268 case clang::ARM::BI__builtin_arm_crc32cw:
9269 case clang::ARM::BI__builtin_arm_crc32cd:
9270 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
9271 }
9272
9273 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
9274 Value *Arg0 = EmitScalarExpr(E->getArg(0));
9275 Value *Arg1 = EmitScalarExpr(E->getArg(1));
9276
9277 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
9278 // intrinsics, hence we need different codegen for these cases.
9279 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
9280 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
9281 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9282 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
9283 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
9284 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
9285
9286 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9287 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
9288 return Builder.CreateCall(F, {Res, Arg1b});
9289 } else {
9290 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
9291
9292 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9293 return Builder.CreateCall(F, {Arg0, Arg1});
9294 }
9295 }
9296
9297 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9298 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9299 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9300 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
9301 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
9302 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
9303
9304 SpecialRegisterAccessKind AccessKind = Write;
9305 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9306 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9307 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
9308 AccessKind = VolatileRead;
9309
9310 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9311 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
9312
9313 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9314 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
9315
9316 llvm::Type *ValueType;
9317 llvm::Type *RegisterType;
9318 if (IsPointerBuiltin) {
9319 ValueType = VoidPtrTy;
9321 } else if (Is64Bit) {
9322 ValueType = RegisterType = Int64Ty;
9323 } else {
9324 ValueType = RegisterType = Int32Ty;
9325 }
9326
9327 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
9328 AccessKind);
9329 }
9330
9331 if (BuiltinID == ARM::BI__builtin_sponentry) {
9332 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
9333 return Builder.CreateCall(F);
9334 }
9335
9336 // Handle MSVC intrinsics before argument evaluation to prevent double
9337 // evaluation.
9338 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
9339 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
9340
9341 // Deal with MVE builtins
9342 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9343 return Result;
9344 // Handle CDE builtins
9345 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9346 return Result;
9347
9348 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
9349 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
9350 return P.first == BuiltinID;
9351 });
9352 if (It != end(NEONEquivalentIntrinsicMap))
9353 BuiltinID = It->second;
9354
9355 // Find out if any arguments are required to be integer constant
9356 // expressions.
9357 unsigned ICEArguments = 0;
9359 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9360 assert(Error == ASTContext::GE_None && "Should not codegen an error");
9361
9362 auto getAlignmentValue32 = [&](Address addr) -> Value* {
9363 return Builder.getInt32(addr.getAlignment().getQuantity());
9364 };
9365
9366 Address PtrOp0 = Address::invalid();
9367 Address PtrOp1 = Address::invalid();
9369 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
9370 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
9371 for (unsigned i = 0, e = NumArgs; i != e; i++) {
9372 if (i == 0) {
9373 switch (BuiltinID) {
9374 case NEON::BI__builtin_neon_vld1_v:
9375 case NEON::BI__builtin_neon_vld1q_v:
9376 case NEON::BI__builtin_neon_vld1q_lane_v:
9377 case NEON::BI__builtin_neon_vld1_lane_v:
9378 case NEON::BI__builtin_neon_vld1_dup_v:
9379 case NEON::BI__builtin_neon_vld1q_dup_v:
9380 case NEON::BI__builtin_neon_vst1_v:
9381 case NEON::BI__builtin_neon_vst1q_v:
9382 case NEON::BI__builtin_neon_vst1q_lane_v:
9383 case NEON::BI__builtin_neon_vst1_lane_v:
9384 case NEON::BI__builtin_neon_vst2_v:
9385 case NEON::BI__builtin_neon_vst2q_v:
9386 case NEON::BI__builtin_neon_vst2_lane_v:
9387 case NEON::BI__builtin_neon_vst2q_lane_v:
9388 case NEON::BI__builtin_neon_vst3_v:
9389 case NEON::BI__builtin_neon_vst3q_v:
9390 case NEON::BI__builtin_neon_vst3_lane_v:
9391 case NEON::BI__builtin_neon_vst3q_lane_v:
9392 case NEON::BI__builtin_neon_vst4_v:
9393 case NEON::BI__builtin_neon_vst4q_v:
9394 case NEON::BI__builtin_neon_vst4_lane_v:
9395 case NEON::BI__builtin_neon_vst4q_lane_v:
9396 // Get the alignment for the argument in addition to the value;
9397 // we'll use it later.
9398 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
9399 Ops.push_back(PtrOp0.emitRawPointer(*this));
9400 continue;
9401 }
9402 }
9403 if (i == 1) {
9404 switch (BuiltinID) {
9405 case NEON::BI__builtin_neon_vld2_v:
9406 case NEON::BI__builtin_neon_vld2q_v:
9407 case NEON::BI__builtin_neon_vld3_v:
9408 case NEON::BI__builtin_neon_vld3q_v:
9409 case NEON::BI__builtin_neon_vld4_v:
9410 case NEON::BI__builtin_neon_vld4q_v:
9411 case NEON::BI__builtin_neon_vld2_lane_v:
9412 case NEON::BI__builtin_neon_vld2q_lane_v:
9413 case NEON::BI__builtin_neon_vld3_lane_v:
9414 case NEON::BI__builtin_neon_vld3q_lane_v:
9415 case NEON::BI__builtin_neon_vld4_lane_v:
9416 case NEON::BI__builtin_neon_vld4q_lane_v:
9417 case NEON::BI__builtin_neon_vld2_dup_v:
9418 case NEON::BI__builtin_neon_vld2q_dup_v:
9419 case NEON::BI__builtin_neon_vld3_dup_v:
9420 case NEON::BI__builtin_neon_vld3q_dup_v:
9421 case NEON::BI__builtin_neon_vld4_dup_v:
9422 case NEON::BI__builtin_neon_vld4q_dup_v:
9423 // Get the alignment for the argument in addition to the value;
9424 // we'll use it later.
9425 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9426 Ops.push_back(PtrOp1.emitRawPointer(*this));
9427 continue;
9428 }
9429 }
9430
9431 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9432 }
9433
9434 switch (BuiltinID) {
9435 default: break;
9436
9437 case NEON::BI__builtin_neon_vget_lane_i8:
9438 case NEON::BI__builtin_neon_vget_lane_i16:
9439 case NEON::BI__builtin_neon_vget_lane_i32:
9440 case NEON::BI__builtin_neon_vget_lane_i64:
9441 case NEON::BI__builtin_neon_vget_lane_bf16:
9442 case NEON::BI__builtin_neon_vget_lane_f32:
9443 case NEON::BI__builtin_neon_vgetq_lane_i8:
9444 case NEON::BI__builtin_neon_vgetq_lane_i16:
9445 case NEON::BI__builtin_neon_vgetq_lane_i32:
9446 case NEON::BI__builtin_neon_vgetq_lane_i64:
9447 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9448 case NEON::BI__builtin_neon_vgetq_lane_f32:
9449 case NEON::BI__builtin_neon_vduph_lane_bf16:
9450 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9451 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9452
9453 case NEON::BI__builtin_neon_vrndns_f32: {
9454 Value *Arg = EmitScalarExpr(E->getArg(0));
9455 llvm::Type *Tys[] = {Arg->getType()};
9456 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9457 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9458
9459 case NEON::BI__builtin_neon_vset_lane_i8:
9460 case NEON::BI__builtin_neon_vset_lane_i16:
9461 case NEON::BI__builtin_neon_vset_lane_i32:
9462 case NEON::BI__builtin_neon_vset_lane_i64:
9463 case NEON::BI__builtin_neon_vset_lane_bf16:
9464 case NEON::BI__builtin_neon_vset_lane_f32:
9465 case NEON::BI__builtin_neon_vsetq_lane_i8:
9466 case NEON::BI__builtin_neon_vsetq_lane_i16:
9467 case NEON::BI__builtin_neon_vsetq_lane_i32:
9468 case NEON::BI__builtin_neon_vsetq_lane_i64:
9469 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9470 case NEON::BI__builtin_neon_vsetq_lane_f32:
9471 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9472
9473 case NEON::BI__builtin_neon_vsha1h_u32:
9474 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9475 "vsha1h");
9476 case NEON::BI__builtin_neon_vsha1cq_u32:
9477 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9478 "vsha1h");
9479 case NEON::BI__builtin_neon_vsha1pq_u32:
9480 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9481 "vsha1h");
9482 case NEON::BI__builtin_neon_vsha1mq_u32:
9483 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9484 "vsha1h");
9485
9486 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9487 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9488 "vcvtbfp2bf");
9489 }
9490
9491 // The ARM _MoveToCoprocessor builtins put the input register value as
9492 // the first argument, but the LLVM intrinsic expects it as the third one.
9493 case clang::ARM::BI_MoveToCoprocessor:
9494 case clang::ARM::BI_MoveToCoprocessor2: {
9495 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9496 ? Intrinsic::arm_mcr
9497 : Intrinsic::arm_mcr2);
9498 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9499 Ops[3], Ops[4], Ops[5]});
9500 }
9501 }
9502
9503 // Get the last argument, which specifies the vector type.
9504 assert(HasExtraArg);
9505 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9506 std::optional<llvm::APSInt> Result =
9508 if (!Result)
9509 return nullptr;
9510
9511 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9512 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9513 // Determine the overloaded type of this builtin.
9514 llvm::Type *Ty;
9515 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9516 Ty = FloatTy;
9517 else
9518 Ty = DoubleTy;
9519
9520 // Determine whether this is an unsigned conversion or not.
9521 bool usgn = Result->getZExtValue() == 1;
9522 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9523
9524 // Call the appropriate intrinsic.
9525 Function *F = CGM.getIntrinsic(Int, Ty);
9526 return Builder.CreateCall(F, Ops, "vcvtr");
9527 }
9528
9529 // Determine the type of this overloaded NEON intrinsic.
9530 NeonTypeFlags Type = Result->getZExtValue();
9531 bool usgn = Type.isUnsigned();
9532 bool rightShift = false;
9533
9534 llvm::FixedVectorType *VTy =
9535 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9536 getTarget().hasBFloat16Type());
9537 llvm::Type *Ty = VTy;
9538 if (!Ty)
9539 return nullptr;
9540
9541 // Many NEON builtins have identical semantics and uses in ARM and
9542 // AArch64. Emit these in a single function.
9543 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9544 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9545 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9546 if (Builtin)
9548 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9549 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9550
9551 unsigned Int;
9552 switch (BuiltinID) {
9553 default: return nullptr;
9554 case NEON::BI__builtin_neon_vld1q_lane_v:
9555 // Handle 64-bit integer elements as a special case. Use shuffles of
9556 // one-element vectors to avoid poor code for i64 in the backend.
9557 if (VTy->getElementType()->isIntegerTy(64)) {
9558 // Extract the other lane.
9559 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9560 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9561 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9562 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9563 // Load the value as a one-element vector.
9564 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9565 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9566 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9567 Value *Align = getAlignmentValue32(PtrOp0);
9568 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9569 // Combine them.
9570 int Indices[] = {1 - Lane, Lane};
9571 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9572 }
9573 [[fallthrough]];
9574 case NEON::BI__builtin_neon_vld1_lane_v: {
9575 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9576 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9577 Value *Ld = Builder.CreateLoad(PtrOp0);
9578 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9579 }
9580 case NEON::BI__builtin_neon_vqrshrn_n_v:
9581 Int =
9582 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9583 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9584 1, true);
9585 case NEON::BI__builtin_neon_vqrshrun_n_v:
9586 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9587 Ops, "vqrshrun_n", 1, true);
9588 case NEON::BI__builtin_neon_vqshrn_n_v:
9589 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9590 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9591 1, true);
9592 case NEON::BI__builtin_neon_vqshrun_n_v:
9593 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9594 Ops, "vqshrun_n", 1, true);
9595 case NEON::BI__builtin_neon_vrecpe_v:
9596 case NEON::BI__builtin_neon_vrecpeq_v:
9597 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9598 Ops, "vrecpe");
9599 case NEON::BI__builtin_neon_vrshrn_n_v:
9600 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9601 Ops, "vrshrn_n", 1, true);
9602 case NEON::BI__builtin_neon_vrsra_n_v:
9603 case NEON::BI__builtin_neon_vrsraq_n_v:
9604 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9605 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9606 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9607 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9608 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9609 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9610 case NEON::BI__builtin_neon_vsri_n_v:
9611 case NEON::BI__builtin_neon_vsriq_n_v:
9612 rightShift = true;
9613 [[fallthrough]];
9614 case NEON::BI__builtin_neon_vsli_n_v:
9615 case NEON::BI__builtin_neon_vsliq_n_v:
9616 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9617 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9618 Ops, "vsli_n");
9619 case NEON::BI__builtin_neon_vsra_n_v:
9620 case NEON::BI__builtin_neon_vsraq_n_v:
9621 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9622 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9623 return Builder.CreateAdd(Ops[0], Ops[1]);
9624 case NEON::BI__builtin_neon_vst1q_lane_v:
9625 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9626 // a one-element vector and avoid poor code for i64 in the backend.
9627 if (VTy->getElementType()->isIntegerTy(64)) {
9628 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9629 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9630 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9631 Ops[2] = getAlignmentValue32(PtrOp0);
9632 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9633 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9634 Tys), Ops);
9635 }
9636 [[fallthrough]];
9637 case NEON::BI__builtin_neon_vst1_lane_v: {
9638 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9639 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9640 return Builder.CreateStore(Ops[1],
9641 PtrOp0.withElementType(Ops[1]->getType()));
9642 }
9643 case NEON::BI__builtin_neon_vtbl1_v:
9644 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9645 Ops, "vtbl1");
9646 case NEON::BI__builtin_neon_vtbl2_v:
9647 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9648 Ops, "vtbl2");
9649 case NEON::BI__builtin_neon_vtbl3_v:
9650 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9651 Ops, "vtbl3");
9652 case NEON::BI__builtin_neon_vtbl4_v:
9653 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9654 Ops, "vtbl4");
9655 case NEON::BI__builtin_neon_vtbx1_v:
9656 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9657 Ops, "vtbx1");
9658 case NEON::BI__builtin_neon_vtbx2_v:
9659 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9660 Ops, "vtbx2");
9661 case NEON::BI__builtin_neon_vtbx3_v:
9662 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9663 Ops, "vtbx3");
9664 case NEON::BI__builtin_neon_vtbx4_v:
9665 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9666 Ops, "vtbx4");
9667 }
9668}
9669
9670template<typename Integer>
9672 return E->getIntegerConstantExpr(Context)->getExtValue();
9673}
9674
9675static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9676 llvm::Type *T, bool Unsigned) {
9677 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9678 // which finds it convenient to specify signed/unsigned as a boolean flag.
9679 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9680}
9681
9682static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9683 uint32_t Shift, bool Unsigned) {
9684 // MVE helper function for integer shift right. This must handle signed vs
9685 // unsigned, and also deal specially with the case where the shift count is
9686 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9687 // undefined behavior, but in MVE it's legal, so we must convert it to code
9688 // that is not undefined in IR.
9689 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9690 ->getElementType()
9691 ->getPrimitiveSizeInBits();
9692 if (Shift == LaneBits) {
9693 // An unsigned shift of the full lane size always generates zero, so we can
9694 // simply emit a zero vector. A signed shift of the full lane size does the
9695 // same thing as shifting by one bit fewer.
9696 if (Unsigned)
9697 return llvm::Constant::getNullValue(V->getType());
9698 else
9699 --Shift;
9700 }
9701 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9702}
9703
9704static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9705 // MVE-specific helper function for a vector splat, which infers the element
9706 // count of the output vector by knowing that MVE vectors are all 128 bits
9707 // wide.
9708 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9709 return Builder.CreateVectorSplat(Elements, V);
9710}
9711
9712static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9713 CodeGenFunction *CGF,
9714 llvm::Value *V,
9715 llvm::Type *DestType) {
9716 // Convert one MVE vector type into another by reinterpreting its in-register
9717 // format.
9718 //
9719 // Little-endian, this is identical to a bitcast (which reinterprets the
9720 // memory format). But big-endian, they're not necessarily the same, because
9721 // the register and memory formats map to each other differently depending on
9722 // the lane size.
9723 //
9724 // We generate a bitcast whenever we can (if we're little-endian, or if the
9725 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9726 // that performs the different kind of reinterpretation.
9727 if (CGF->getTarget().isBigEndian() &&
9728 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9729 return Builder.CreateCall(
9730 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9731 {DestType, V->getType()}),
9732 V);
9733 } else {
9734 return Builder.CreateBitCast(V, DestType);
9735 }
9736}
9737
9738static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9739 // Make a shufflevector that extracts every other element of a vector (evens
9740 // or odds, as desired).
9741 SmallVector<int, 16> Indices;
9742 unsigned InputElements =
9743 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9744 for (unsigned i = 0; i < InputElements; i += 2)
9745 Indices.push_back(i + Odd);
9746 return Builder.CreateShuffleVector(V, Indices);
9747}
9748
9749static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9750 llvm::Value *V1) {
9751 // Make a shufflevector that interleaves two vectors element by element.
9752 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9753 SmallVector<int, 16> Indices;
9754 unsigned InputElements =
9755 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9756 for (unsigned i = 0; i < InputElements; i++) {
9757 Indices.push_back(i);
9758 Indices.push_back(i + InputElements);
9759 }
9760 return Builder.CreateShuffleVector(V0, V1, Indices);
9761}
9762
9763template<unsigned HighBit, unsigned OtherBits>
9764static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9765 // MVE-specific helper function to make a vector splat of a constant such as
9766 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9767 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9768 unsigned LaneBits = T->getPrimitiveSizeInBits();
9769 uint32_t Value = HighBit << (LaneBits - 1);
9770 if (OtherBits)
9771 Value |= (1UL << (LaneBits - 1)) - 1;
9772 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9773 return ARMMVEVectorSplat(Builder, Lane);
9774}
9775
9776static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9777 llvm::Value *V,
9778 unsigned ReverseWidth) {
9779 // MVE-specific helper function which reverses the elements of a
9780 // vector within every (ReverseWidth)-bit collection of lanes.
9781 SmallVector<int, 16> Indices;
9782 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9783 unsigned Elements = 128 / LaneSize;
9784 unsigned Mask = ReverseWidth / LaneSize - 1;
9785 for (unsigned i = 0; i < Elements; i++)
9786 Indices.push_back(i ^ Mask);
9787 return Builder.CreateShuffleVector(V, Indices);
9788}
9789
9791 const CallExpr *E,
9792 ReturnValueSlot ReturnValue,
9793 llvm::Triple::ArchType Arch) {
9794 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9795 Intrinsic::ID IRIntr;
9796 unsigned NumVectors;
9797
9798 // Code autogenerated by Tablegen will handle all the simple builtins.
9799 switch (BuiltinID) {
9800 #include "clang/Basic/arm_mve_builtin_cg.inc"
9801
9802 // If we didn't match an MVE builtin id at all, go back to the
9803 // main EmitARMBuiltinExpr.
9804 default:
9805 return nullptr;
9806 }
9807
9808 // Anything that breaks from that switch is an MVE builtin that
9809 // needs handwritten code to generate.
9810
9811 switch (CustomCodeGenType) {
9812
9813 case CustomCodeGen::VLD24: {
9816
9817 auto MvecCType = E->getType();
9818 auto MvecLType = ConvertType(MvecCType);
9819 assert(MvecLType->isStructTy() &&
9820 "Return type for vld[24]q should be a struct");
9821 assert(MvecLType->getStructNumElements() == 1 &&
9822 "Return-type struct for vld[24]q should have one element");
9823 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9824 assert(MvecLTypeInner->isArrayTy() &&
9825 "Return-type struct for vld[24]q should contain an array");
9826 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9827 "Array member of return-type struct vld[24]q has wrong length");
9828 auto VecLType = MvecLTypeInner->getArrayElementType();
9829
9830 Tys.push_back(VecLType);
9831
9832 auto Addr = E->getArg(0);
9833 Ops.push_back(EmitScalarExpr(Addr));
9834 Tys.push_back(ConvertType(Addr->getType()));
9835
9836 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9837 Value *LoadResult = Builder.CreateCall(F, Ops);
9838 Value *MvecOut = PoisonValue::get(MvecLType);
9839 for (unsigned i = 0; i < NumVectors; ++i) {
9840 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9841 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9842 }
9843
9844 if (ReturnValue.isNull())
9845 return MvecOut;
9846 else
9847 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9848 }
9849
9850 case CustomCodeGen::VST24: {
9853
9854 auto Addr = E->getArg(0);
9855 Ops.push_back(EmitScalarExpr(Addr));
9856 Tys.push_back(ConvertType(Addr->getType()));
9857
9858 auto MvecCType = E->getArg(1)->getType();
9859 auto MvecLType = ConvertType(MvecCType);
9860 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9861 assert(MvecLType->getStructNumElements() == 1 &&
9862 "Data-type struct for vst2q should have one element");
9863 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9864 assert(MvecLTypeInner->isArrayTy() &&
9865 "Data-type struct for vst2q should contain an array");
9866 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9867 "Array member of return-type struct vld[24]q has wrong length");
9868 auto VecLType = MvecLTypeInner->getArrayElementType();
9869
9870 Tys.push_back(VecLType);
9871
9872 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9873 EmitAggExpr(E->getArg(1), MvecSlot);
9874 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9875 for (unsigned i = 0; i < NumVectors; i++)
9876 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9877
9878 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9879 Value *ToReturn = nullptr;
9880 for (unsigned i = 0; i < NumVectors; i++) {
9881 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9882 ToReturn = Builder.CreateCall(F, Ops);
9883 Ops.pop_back();
9884 }
9885 return ToReturn;
9886 }
9887 }
9888 llvm_unreachable("unknown custom codegen type.");
9889}
9890
9892 const CallExpr *E,
9893 ReturnValueSlot ReturnValue,
9894 llvm::Triple::ArchType Arch) {
9895 switch (BuiltinID) {
9896 default:
9897 return nullptr;
9898#include "clang/Basic/arm_cde_builtin_cg.inc"
9899 }
9900}
9901
9902static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9903 const CallExpr *E,
9905 llvm::Triple::ArchType Arch) {
9906 unsigned int Int = 0;
9907 const char *s = nullptr;
9908
9909 switch (BuiltinID) {
9910 default:
9911 return nullptr;
9912 case NEON::BI__builtin_neon_vtbl1_v:
9913 case NEON::BI__builtin_neon_vqtbl1_v:
9914 case NEON::BI__builtin_neon_vqtbl1q_v:
9915 case NEON::BI__builtin_neon_vtbl2_v:
9916 case NEON::BI__builtin_neon_vqtbl2_v:
9917 case NEON::BI__builtin_neon_vqtbl2q_v:
9918 case NEON::BI__builtin_neon_vtbl3_v:
9919 case NEON::BI__builtin_neon_vqtbl3_v:
9920 case NEON::BI__builtin_neon_vqtbl3q_v:
9921 case NEON::BI__builtin_neon_vtbl4_v:
9922 case NEON::BI__builtin_neon_vqtbl4_v:
9923 case NEON::BI__builtin_neon_vqtbl4q_v:
9924 break;
9925 case NEON::BI__builtin_neon_vtbx1_v:
9926 case NEON::BI__builtin_neon_vqtbx1_v:
9927 case NEON::BI__builtin_neon_vqtbx1q_v:
9928 case NEON::BI__builtin_neon_vtbx2_v:
9929 case NEON::BI__builtin_neon_vqtbx2_v:
9930 case NEON::BI__builtin_neon_vqtbx2q_v:
9931 case NEON::BI__builtin_neon_vtbx3_v:
9932 case NEON::BI__builtin_neon_vqtbx3_v:
9933 case NEON::BI__builtin_neon_vqtbx3q_v:
9934 case NEON::BI__builtin_neon_vtbx4_v:
9935 case NEON::BI__builtin_neon_vqtbx4_v:
9936 case NEON::BI__builtin_neon_vqtbx4q_v:
9937 break;
9938 }
9939
9940 assert(E->getNumArgs() >= 3);
9941
9942 // Get the last argument, which specifies the vector type.
9943 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9944 std::optional<llvm::APSInt> Result =
9946 if (!Result)
9947 return nullptr;
9948
9949 // Determine the type of this overloaded NEON intrinsic.
9950 NeonTypeFlags Type = Result->getZExtValue();
9951 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9952 if (!Ty)
9953 return nullptr;
9954
9955 CodeGen::CGBuilderTy &Builder = CGF.Builder;
9956
9957 // AArch64 scalar builtins are not overloaded, they do not have an extra
9958 // argument that specifies the vector type, need to handle each case.
9959 switch (BuiltinID) {
9960 case NEON::BI__builtin_neon_vtbl1_v: {
9961 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9962 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9963 }
9964 case NEON::BI__builtin_neon_vtbl2_v: {
9965 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9966 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9967 }
9968 case NEON::BI__builtin_neon_vtbl3_v: {
9969 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9970 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9971 }
9972 case NEON::BI__builtin_neon_vtbl4_v: {
9973 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9974 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9975 }
9976 case NEON::BI__builtin_neon_vtbx1_v: {
9977 Value *TblRes =
9978 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9979 Intrinsic::aarch64_neon_tbl1, "vtbl1");
9980
9981 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9982 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9983 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9984
9985 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9986 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9987 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9988 }
9989 case NEON::BI__builtin_neon_vtbx2_v: {
9990 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9991 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9992 }
9993 case NEON::BI__builtin_neon_vtbx3_v: {
9994 Value *TblRes =
9995 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9996 Intrinsic::aarch64_neon_tbl2, "vtbl2");
9997
9998 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9999 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
10000 TwentyFourV);
10001 CmpRes = Builder.CreateSExt(CmpRes, Ty);
10002
10003 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
10004 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
10005 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
10006 }
10007 case NEON::BI__builtin_neon_vtbx4_v: {
10008 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
10009 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
10010 }
10011 case NEON::BI__builtin_neon_vqtbl1_v:
10012 case NEON::BI__builtin_neon_vqtbl1q_v:
10013 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
10014 case NEON::BI__builtin_neon_vqtbl2_v:
10015 case NEON::BI__builtin_neon_vqtbl2q_v: {
10016 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
10017 case NEON::BI__builtin_neon_vqtbl3_v:
10018 case NEON::BI__builtin_neon_vqtbl3q_v:
10019 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
10020 case NEON::BI__builtin_neon_vqtbl4_v:
10021 case NEON::BI__builtin_neon_vqtbl4q_v:
10022 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
10023 case NEON::BI__builtin_neon_vqtbx1_v:
10024 case NEON::BI__builtin_neon_vqtbx1q_v:
10025 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
10026 case NEON::BI__builtin_neon_vqtbx2_v:
10027 case NEON::BI__builtin_neon_vqtbx2q_v:
10028 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
10029 case NEON::BI__builtin_neon_vqtbx3_v:
10030 case NEON::BI__builtin_neon_vqtbx3q_v:
10031 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
10032 case NEON::BI__builtin_neon_vqtbx4_v:
10033 case NEON::BI__builtin_neon_vqtbx4q_v:
10034 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
10035 }
10036 }
10037
10038 if (!Int)
10039 return nullptr;
10040
10041 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
10042 return CGF.EmitNeonCall(F, Ops, s);
10043}
10044
10046 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
10047 Op = Builder.CreateBitCast(Op, Int16Ty);
10048 Value *V = PoisonValue::get(VTy);
10049 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
10050 Op = Builder.CreateInsertElement(V, Op, CI);
10051 return Op;
10052}
10053
10054/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
10055/// access builtin. Only required if it can't be inferred from the base pointer
10056/// operand.
10057llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
10058 switch (TypeFlags.getMemEltType()) {
10059 case SVETypeFlags::MemEltTyDefault:
10060 return getEltType(TypeFlags);
10061 case SVETypeFlags::MemEltTyInt8:
10062 return Builder.getInt8Ty();
10063 case SVETypeFlags::MemEltTyInt16:
10064 return Builder.getInt16Ty();
10065 case SVETypeFlags::MemEltTyInt32:
10066 return Builder.getInt32Ty();
10067 case SVETypeFlags::MemEltTyInt64:
10068 return Builder.getInt64Ty();
10069 }
10070 llvm_unreachable("Unknown MemEltType");
10071}
10072
10073llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
10074 switch (TypeFlags.getEltType()) {
10075 default:
10076 llvm_unreachable("Invalid SVETypeFlag!");
10077
10078 case SVETypeFlags::EltTyInt8:
10079 return Builder.getInt8Ty();
10080 case SVETypeFlags::EltTyInt16:
10081 return Builder.getInt16Ty();
10082 case SVETypeFlags::EltTyInt32:
10083 return Builder.getInt32Ty();
10084 case SVETypeFlags::EltTyInt64:
10085 return Builder.getInt64Ty();
10086 case SVETypeFlags::EltTyInt128:
10087 return Builder.getInt128Ty();
10088
10089 case SVETypeFlags::EltTyFloat16:
10090 return Builder.getHalfTy();
10091 case SVETypeFlags::EltTyFloat32:
10092 return Builder.getFloatTy();
10093 case SVETypeFlags::EltTyFloat64:
10094 return Builder.getDoubleTy();
10095
10096 case SVETypeFlags::EltTyBFloat16:
10097 return Builder.getBFloatTy();
10098
10099 case SVETypeFlags::EltTyBool8:
10100 case SVETypeFlags::EltTyBool16:
10101 case SVETypeFlags::EltTyBool32:
10102 case SVETypeFlags::EltTyBool64:
10103 return Builder.getInt1Ty();
10104 }
10105}
10106
10107// Return the llvm predicate vector type corresponding to the specified element
10108// TypeFlags.
10109llvm::ScalableVectorType *
10111 switch (TypeFlags.getEltType()) {
10112 default: llvm_unreachable("Unhandled SVETypeFlag!");
10113
10114 case SVETypeFlags::EltTyInt8:
10115 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10116 case SVETypeFlags::EltTyInt16:
10117 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10118 case SVETypeFlags::EltTyInt32:
10119 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10120 case SVETypeFlags::EltTyInt64:
10121 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10122
10123 case SVETypeFlags::EltTyBFloat16:
10124 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10125 case SVETypeFlags::EltTyFloat16:
10126 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10127 case SVETypeFlags::EltTyFloat32:
10128 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10129 case SVETypeFlags::EltTyFloat64:
10130 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10131
10132 case SVETypeFlags::EltTyBool8:
10133 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10134 case SVETypeFlags::EltTyBool16:
10135 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10136 case SVETypeFlags::EltTyBool32:
10137 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10138 case SVETypeFlags::EltTyBool64:
10139 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10140 }
10141}
10142
10143// Return the llvm vector type corresponding to the specified element TypeFlags.
10144llvm::ScalableVectorType *
10145CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
10146 switch (TypeFlags.getEltType()) {
10147 default:
10148 llvm_unreachable("Invalid SVETypeFlag!");
10149
10150 case SVETypeFlags::EltTyInt8:
10151 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10152 case SVETypeFlags::EltTyInt16:
10153 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
10154 case SVETypeFlags::EltTyInt32:
10155 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
10156 case SVETypeFlags::EltTyInt64:
10157 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
10158
10159 case SVETypeFlags::EltTyMFloat8:
10160 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10161 case SVETypeFlags::EltTyFloat16:
10162 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
10163 case SVETypeFlags::EltTyBFloat16:
10164 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
10165 case SVETypeFlags::EltTyFloat32:
10166 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
10167 case SVETypeFlags::EltTyFloat64:
10168 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
10169
10170 case SVETypeFlags::EltTyBool8:
10171 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10172 case SVETypeFlags::EltTyBool16:
10173 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10174 case SVETypeFlags::EltTyBool32:
10175 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10176 case SVETypeFlags::EltTyBool64:
10177 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10178 }
10179}
10180
10181llvm::Value *
10183 Function *Ptrue =
10184 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
10185 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
10186}
10187
10188constexpr unsigned SVEBitsPerBlock = 128;
10189
10190static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
10191 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
10192 return llvm::ScalableVectorType::get(EltTy, NumElts);
10193}
10194
10195// Reinterpret the input predicate so that it can be used to correctly isolate
10196// the elements of the specified datatype.
10198 llvm::ScalableVectorType *VTy) {
10199
10200 if (isa<TargetExtType>(Pred->getType()) &&
10201 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
10202 return Pred;
10203
10204 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
10205 if (Pred->getType() == RTy)
10206 return Pred;
10207
10208 unsigned IntID;
10209 llvm::Type *IntrinsicTy;
10210 switch (VTy->getMinNumElements()) {
10211 default:
10212 llvm_unreachable("unsupported element count!");
10213 case 1:
10214 case 2:
10215 case 4:
10216 case 8:
10217 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
10218 IntrinsicTy = RTy;
10219 break;
10220 case 16:
10221 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
10222 IntrinsicTy = Pred->getType();
10223 break;
10224 }
10225
10226 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
10227 Value *C = Builder.CreateCall(F, Pred);
10228 assert(C->getType() == RTy && "Unexpected return type!");
10229 return C;
10230}
10231
10233 llvm::StructType *Ty) {
10234 if (PredTuple->getType() == Ty)
10235 return PredTuple;
10236
10237 Value *Ret = llvm::PoisonValue::get(Ty);
10238 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
10239 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
10240 Pred = EmitSVEPredicateCast(
10241 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
10242 Ret = Builder.CreateInsertValue(Ret, Pred, I);
10243 }
10244
10245 return Ret;
10246}
10247
10250 unsigned IntID) {
10251 auto *ResultTy = getSVEType(TypeFlags);
10252 auto *OverloadedTy =
10253 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
10254
10255 Function *F = nullptr;
10256 if (Ops[1]->getType()->isVectorTy())
10257 // This is the "vector base, scalar offset" case. In order to uniquely
10258 // map this built-in to an LLVM IR intrinsic, we need both the return type
10259 // and the type of the vector base.
10260 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
10261 else
10262 // This is the "scalar base, vector offset case". The type of the offset
10263 // is encoded in the name of the intrinsic. We only need to specify the
10264 // return type in order to uniquely map this built-in to an LLVM IR
10265 // intrinsic.
10266 F = CGM.getIntrinsic(IntID, OverloadedTy);
10267
10268 // At the ACLE level there's only one predicate type, svbool_t, which is
10269 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10270 // actual type being loaded. For example, when loading doubles (i64) the
10271 // predicate should be <n x 2 x i1> instead. At the IR level the type of
10272 // the predicate and the data being loaded must match. Cast to the type
10273 // expected by the intrinsic. The intrinsic itself should be defined in
10274 // a way than enforces relations between parameter types.
10275 Ops[0] = EmitSVEPredicateCast(
10276 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
10277
10278 // Pass 0 when the offset is missing. This can only be applied when using
10279 // the "vector base" addressing mode for which ACLE allows no offset. The
10280 // corresponding LLVM IR always requires an offset.
10281 if (Ops.size() == 2) {
10282 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10283 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10284 }
10285
10286 // For "vector base, scalar index" scale the index so that it becomes a
10287 // scalar offset.
10288 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
10289 unsigned BytesPerElt =
10290 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10291 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10292 }
10293
10294 Value *Call = Builder.CreateCall(F, Ops);
10295
10296 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
10297 // other cases it's folded into a nop.
10298 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
10299 : Builder.CreateSExt(Call, ResultTy);
10300}
10301
10304 unsigned IntID) {
10305 auto *SrcDataTy = getSVEType(TypeFlags);
10306 auto *OverloadedTy =
10307 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
10308
10309 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
10310 // it's the first argument. Move it accordingly.
10311 Ops.insert(Ops.begin(), Ops.pop_back_val());
10312
10313 Function *F = nullptr;
10314 if (Ops[2]->getType()->isVectorTy())
10315 // This is the "vector base, scalar offset" case. In order to uniquely
10316 // map this built-in to an LLVM IR intrinsic, we need both the return type
10317 // and the type of the vector base.
10318 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
10319 else
10320 // This is the "scalar base, vector offset case". The type of the offset
10321 // is encoded in the name of the intrinsic. We only need to specify the
10322 // return type in order to uniquely map this built-in to an LLVM IR
10323 // intrinsic.
10324 F = CGM.getIntrinsic(IntID, OverloadedTy);
10325
10326 // Pass 0 when the offset is missing. This can only be applied when using
10327 // the "vector base" addressing mode for which ACLE allows no offset. The
10328 // corresponding LLVM IR always requires an offset.
10329 if (Ops.size() == 3) {
10330 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10331 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10332 }
10333
10334 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
10335 // folded into a nop.
10336 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
10337
10338 // At the ACLE level there's only one predicate type, svbool_t, which is
10339 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10340 // actual type being stored. For example, when storing doubles (i64) the
10341 // predicated should be <n x 2 x i1> instead. At the IR level the type of
10342 // the predicate and the data being stored must match. Cast to the type
10343 // expected by the intrinsic. The intrinsic itself should be defined in
10344 // a way that enforces relations between parameter types.
10345 Ops[1] = EmitSVEPredicateCast(
10346 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
10347
10348 // For "vector base, scalar index" scale the index so that it becomes a
10349 // scalar offset.
10350 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
10351 unsigned BytesPerElt =
10352 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10353 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
10354 }
10355
10356 return Builder.CreateCall(F, Ops);
10357}
10358
10361 unsigned IntID) {
10362 // The gather prefetches are overloaded on the vector input - this can either
10363 // be the vector of base addresses or vector of offsets.
10364 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
10365 if (!OverloadedTy)
10366 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
10367
10368 // Cast the predicate from svbool_t to the right number of elements.
10369 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
10370
10371 // vector + imm addressing modes
10372 if (Ops[1]->getType()->isVectorTy()) {
10373 if (Ops.size() == 3) {
10374 // Pass 0 for 'vector+imm' when the index is omitted.
10375 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10376
10377 // The sv_prfop is the last operand in the builtin and IR intrinsic.
10378 std::swap(Ops[2], Ops[3]);
10379 } else {
10380 // Index needs to be passed as scaled offset.
10381 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10382 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
10383 if (BytesPerElt > 1)
10384 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10385 }
10386 }
10387
10388 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
10389 return Builder.CreateCall(F, Ops);
10390}
10391
10394 unsigned IntID) {
10395 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10396 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10397 Value *BasePtr = Ops[1];
10398
10399 // Does the load have an offset?
10400 if (Ops.size() > 2)
10401 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10402
10403 Function *F = CGM.getIntrinsic(IntID, {VTy});
10404 return Builder.CreateCall(F, {Predicate, BasePtr});
10405}
10406
10409 unsigned IntID) {
10410 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10411
10412 unsigned N;
10413 switch (IntID) {
10414 case Intrinsic::aarch64_sve_st2:
10415 case Intrinsic::aarch64_sve_st1_pn_x2:
10416 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10417 case Intrinsic::aarch64_sve_st2q:
10418 N = 2;
10419 break;
10420 case Intrinsic::aarch64_sve_st3:
10421 case Intrinsic::aarch64_sve_st3q:
10422 N = 3;
10423 break;
10424 case Intrinsic::aarch64_sve_st4:
10425 case Intrinsic::aarch64_sve_st1_pn_x4:
10426 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10427 case Intrinsic::aarch64_sve_st4q:
10428 N = 4;
10429 break;
10430 default:
10431 llvm_unreachable("unknown intrinsic!");
10432 }
10433
10434 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10435 Value *BasePtr = Ops[1];
10436
10437 // Does the store have an offset?
10438 if (Ops.size() > (2 + N))
10439 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10440
10441 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10442 // need to break up the tuple vector.
10444 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10445 Operands.push_back(Ops[I]);
10446 Operands.append({Predicate, BasePtr});
10447 Function *F = CGM.getIntrinsic(IntID, { VTy });
10448
10449 return Builder.CreateCall(F, Operands);
10450}
10451
10452// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10453// svpmullt_pair intrinsics, with the exception that their results are bitcast
10454// to a wider type.
10457 unsigned BuiltinID) {
10458 // Splat scalar operand to vector (intrinsics with _n infix)
10459 if (TypeFlags.hasSplatOperand()) {
10460 unsigned OpNo = TypeFlags.getSplatOperand();
10461 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10462 }
10463
10464 // The pair-wise function has a narrower overloaded type.
10465 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10466 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10467
10468 // Now bitcast to the wider result type.
10469 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10470 return EmitSVEReinterpret(Call, Ty);
10471}
10472
10474 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10475 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10476 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10477 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10478}
10479
10482 unsigned BuiltinID) {
10483 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10484 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10485 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10486
10487 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10488 Value *BasePtr = Ops[1];
10489
10490 // Implement the index operand if not omitted.
10491 if (Ops.size() > 3)
10492 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10493
10494 Value *PrfOp = Ops.back();
10495
10496 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10497 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10498}
10499
10501 llvm::Type *ReturnTy,
10503 unsigned IntrinsicID,
10504 bool IsZExtReturn) {
10505 QualType LangPTy = E->getArg(1)->getType();
10506 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10507 LangPTy->castAs<PointerType>()->getPointeeType());
10508
10509 // The vector type that is returned may be different from the
10510 // eventual type loaded from memory.
10511 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10512 llvm::ScalableVectorType *MemoryTy = nullptr;
10513 llvm::ScalableVectorType *PredTy = nullptr;
10514 bool IsQuadLoad = false;
10515 switch (IntrinsicID) {
10516 case Intrinsic::aarch64_sve_ld1uwq:
10517 case Intrinsic::aarch64_sve_ld1udq:
10518 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10519 PredTy = llvm::ScalableVectorType::get(
10520 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10521 IsQuadLoad = true;
10522 break;
10523 default:
10524 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10525 PredTy = MemoryTy;
10526 break;
10527 }
10528
10529 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10530 Value *BasePtr = Ops[1];
10531
10532 // Does the load have an offset?
10533 if (Ops.size() > 2)
10534 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10535
10536 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10537 auto *Load =
10538 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10539 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10540 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10541
10542 if (IsQuadLoad)
10543 return Load;
10544
10545 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10546 : Builder.CreateSExt(Load, VectorTy);
10547}
10548
10551 unsigned IntrinsicID) {
10552 QualType LangPTy = E->getArg(1)->getType();
10553 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10554 LangPTy->castAs<PointerType>()->getPointeeType());
10555
10556 // The vector type that is stored may be different from the
10557 // eventual type stored to memory.
10558 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10559 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10560
10561 auto PredTy = MemoryTy;
10562 auto AddrMemoryTy = MemoryTy;
10563 bool IsQuadStore = false;
10564
10565 switch (IntrinsicID) {
10566 case Intrinsic::aarch64_sve_st1wq:
10567 case Intrinsic::aarch64_sve_st1dq:
10568 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10569 PredTy =
10570 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10571 IsQuadStore = true;
10572 break;
10573 default:
10574 break;
10575 }
10576 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10577 Value *BasePtr = Ops[1];
10578
10579 // Does the store have an offset?
10580 if (Ops.size() == 4)
10581 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10582
10583 // Last value is always the data
10584 Value *Val =
10585 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10586
10587 Function *F =
10588 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10589 auto *Store =
10590 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10591 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10592 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10593 return Store;
10594}
10595
10598 unsigned IntID) {
10599 Ops[2] = EmitSVEPredicateCast(
10601
10602 SmallVector<Value *> NewOps;
10603 NewOps.push_back(Ops[2]);
10604
10605 llvm::Value *BasePtr = Ops[3];
10606 llvm::Value *RealSlice = Ops[1];
10607 // If the intrinsic contains the vnum parameter, multiply it with the vector
10608 // size in bytes.
10609 if (Ops.size() == 5) {
10610 Function *StreamingVectorLength =
10611 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10612 llvm::Value *StreamingVectorLengthCall =
10613 Builder.CreateCall(StreamingVectorLength);
10614 llvm::Value *Mulvl =
10615 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10616 // The type of the ptr parameter is void *, so use Int8Ty here.
10617 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10618 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
10619 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
10620 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
10621 }
10622 NewOps.push_back(BasePtr);
10623 NewOps.push_back(Ops[0]);
10624 NewOps.push_back(RealSlice);
10625 Function *F = CGM.getIntrinsic(IntID);
10626 return Builder.CreateCall(F, NewOps);
10627}
10628
10631 unsigned IntID) {
10632 auto *VecTy = getSVEType(TypeFlags);
10633 Function *F = CGM.getIntrinsic(IntID, VecTy);
10634 if (TypeFlags.isReadZA())
10635 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10636 else if (TypeFlags.isWriteZA())
10637 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10638 return Builder.CreateCall(F, Ops);
10639}
10640
10643 unsigned IntID) {
10644 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10645 if (Ops.size() == 0)
10646 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10647 Function *F = CGM.getIntrinsic(IntID, {});
10648 return Builder.CreateCall(F, Ops);
10649}
10650
10653 unsigned IntID) {
10654 if (Ops.size() == 2)
10655 Ops.push_back(Builder.getInt32(0));
10656 else
10657 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10658 Function *F = CGM.getIntrinsic(IntID, {});
10659 return Builder.CreateCall(F, Ops);
10660}
10661
10662// Limit the usage of scalable llvm IR generated by the ACLE by using the
10663// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10664Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10665 return Builder.CreateVectorSplat(
10666 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10667}
10668
10670 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
10671#ifndef NDEBUG
10672 auto *VecTy = cast<llvm::VectorType>(Ty);
10673 ElementCount EC = VecTy->getElementCount();
10674 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
10675 "Only <1 x i8> expected");
10676#endif
10677 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
10678 }
10679 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10680}
10681
10682Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10683 // FIXME: For big endian this needs an additional REV, or needs a separate
10684 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10685 // instruction is defined as 'bitwise' equivalent from memory point of
10686 // view (when storing/reloading), whereas the svreinterpret builtin
10687 // implements bitwise equivalent cast from register point of view.
10688 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10689
10690 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
10691 Value *Tuple = llvm::PoisonValue::get(Ty);
10692
10693 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10694 Value *In = Builder.CreateExtractValue(Val, I);
10695 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10696 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
10697 }
10698
10699 return Tuple;
10700 }
10701
10702 return Builder.CreateBitCast(Val, Ty);
10703}
10704
10705static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10707 auto *SplatZero = Constant::getNullValue(Ty);
10708 Ops.insert(Ops.begin(), SplatZero);
10709}
10710
10711static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10713 auto *SplatUndef = UndefValue::get(Ty);
10714 Ops.insert(Ops.begin(), SplatUndef);
10715}
10716
10719 llvm::Type *ResultType,
10720 ArrayRef<Value *> Ops) {
10721 if (TypeFlags.isOverloadNone())
10722 return {};
10723
10724 llvm::Type *DefaultType = getSVEType(TypeFlags);
10725
10726 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10727 return {DefaultType, Ops[1]->getType()};
10728
10729 if (TypeFlags.isOverloadWhileRW())
10730 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10731
10732 if (TypeFlags.isOverloadCvt())
10733 return {Ops[0]->getType(), Ops.back()->getType()};
10734
10735 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10736 ResultType->isVectorTy())
10737 return {ResultType, Ops[1]->getType()};
10738
10739 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10740 return {DefaultType};
10741}
10742
10744 ArrayRef<Value *> Ops) {
10745 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10746 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10747 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
10748
10749 if (TypeFlags.isTupleSet())
10750 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
10751 return Builder.CreateExtractValue(Ops[0], Idx);
10752}
10753
10755 llvm::Type *Ty,
10756 ArrayRef<Value *> Ops) {
10757 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10758
10759 Value *Tuple = llvm::PoisonValue::get(Ty);
10760 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
10761 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
10762
10763 return Tuple;
10764}
10765
10767 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10768 SVETypeFlags TypeFlags) {
10769 // Find out if any arguments are required to be integer constant expressions.
10770 unsigned ICEArguments = 0;
10772 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10773 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10774
10775 // Tuple set/get only requires one insert/extract vector, which is
10776 // created by EmitSVETupleSetOrGet.
10777 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10778
10779 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10780 bool IsICE = ICEArguments & (1 << i);
10781 Value *Arg = EmitScalarExpr(E->getArg(i));
10782
10783 if (IsICE) {
10784 // If this is required to be a constant, constant fold it so that we know
10785 // that the generated intrinsic gets a ConstantInt.
10786 std::optional<llvm::APSInt> Result =
10787 E->getArg(i)->getIntegerConstantExpr(getContext());
10788 assert(Result && "Expected argument to be a constant");
10789
10790 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10791 // truncate because the immediate has been range checked and no valid
10792 // immediate requires more than a handful of bits.
10793 *Result = Result->extOrTrunc(32);
10794 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10795 continue;
10796 }
10797
10798 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10799 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10800 Ops.push_back(Builder.CreateExtractValue(Arg, I));
10801
10802 continue;
10803 }
10804
10805 Ops.push_back(Arg);
10806 }
10807}
10808
10810 const CallExpr *E) {
10811 llvm::Type *Ty = ConvertType(E->getType());
10812 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10813 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10814 Value *Val = EmitScalarExpr(E->getArg(0));
10815 return EmitSVEReinterpret(Val, Ty);
10816 }
10817
10818 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10820
10822 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10823 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10824
10825 if (TypeFlags.isLoad())
10826 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10827 TypeFlags.isZExtReturn());
10828 else if (TypeFlags.isStore())
10829 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10830 else if (TypeFlags.isGatherLoad())
10831 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10832 else if (TypeFlags.isScatterStore())
10833 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10834 else if (TypeFlags.isPrefetch())
10835 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10836 else if (TypeFlags.isGatherPrefetch())
10837 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10838 else if (TypeFlags.isStructLoad())
10839 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10840 else if (TypeFlags.isStructStore())
10841 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10842 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10843 return EmitSVETupleSetOrGet(TypeFlags, Ops);
10844 else if (TypeFlags.isTupleCreate())
10845 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10846 else if (TypeFlags.isUndef())
10847 return UndefValue::get(Ty);
10848 else if (Builtin->LLVMIntrinsic != 0) {
10849 // Emit set FPMR for intrinsics that require it
10850 if (TypeFlags.setsFPMR())
10851 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
10852 Ops.pop_back_val());
10853 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10855
10856 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10858
10859 // Some ACLE builtins leave out the argument to specify the predicate
10860 // pattern, which is expected to be expanded to an SV_ALL pattern.
10861 if (TypeFlags.isAppendSVALL())
10862 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10863 if (TypeFlags.isInsertOp1SVALL())
10864 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10865
10866 // Predicates must match the main datatype.
10867 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10868 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10869 if (PredTy->getElementType()->isIntegerTy(1))
10870 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10871
10872 // Splat scalar operand to vector (intrinsics with _n infix)
10873 if (TypeFlags.hasSplatOperand()) {
10874 unsigned OpNo = TypeFlags.getSplatOperand();
10875 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10876 }
10877
10878 if (TypeFlags.isReverseCompare())
10879 std::swap(Ops[1], Ops[2]);
10880 else if (TypeFlags.isReverseUSDOT())
10881 std::swap(Ops[1], Ops[2]);
10882 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10883 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10884 std::swap(Ops[1], Ops[2]);
10885 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10886 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10887 std::swap(Ops[1], Ops[3]);
10888
10889 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10890 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10891 llvm::Type *OpndTy = Ops[1]->getType();
10892 auto *SplatZero = Constant::getNullValue(OpndTy);
10893 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10894 }
10895
10896 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10897 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10898 Value *Call = Builder.CreateCall(F, Ops);
10899
10900 if (Call->getType() == Ty)
10901 return Call;
10902
10903 // Predicate results must be converted to svbool_t.
10904 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
10905 return EmitSVEPredicateCast(Call, PredTy);
10906 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
10907 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
10908
10909 llvm_unreachable("unsupported element count!");
10910 }
10911
10912 switch (BuiltinID) {
10913 default:
10914 return nullptr;
10915
10916 case SVE::BI__builtin_sve_svreinterpret_b: {
10917 auto SVCountTy =
10918 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10919 Function *CastFromSVCountF =
10920 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10921 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10922 }
10923 case SVE::BI__builtin_sve_svreinterpret_c: {
10924 auto SVCountTy =
10925 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10926 Function *CastToSVCountF =
10927 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10928 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10929 }
10930
10931 case SVE::BI__builtin_sve_svpsel_lane_b8:
10932 case SVE::BI__builtin_sve_svpsel_lane_b16:
10933 case SVE::BI__builtin_sve_svpsel_lane_b32:
10934 case SVE::BI__builtin_sve_svpsel_lane_b64:
10935 case SVE::BI__builtin_sve_svpsel_lane_c8:
10936 case SVE::BI__builtin_sve_svpsel_lane_c16:
10937 case SVE::BI__builtin_sve_svpsel_lane_c32:
10938 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10939 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10940 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10941 "aarch64.svcount")) &&
10942 "Unexpected TargetExtType");
10943 auto SVCountTy =
10944 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10945 Function *CastFromSVCountF =
10946 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10947 Function *CastToSVCountF =
10948 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10949
10950 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10951 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10952 llvm::Value *Ops0 =
10953 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10954 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10955 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10956 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10957 }
10958 case SVE::BI__builtin_sve_svmov_b_z: {
10959 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10960 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10961 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10962 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10963 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10964 }
10965
10966 case SVE::BI__builtin_sve_svnot_b_z: {
10967 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10968 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10969 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10970 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10971 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10972 }
10973
10974 case SVE::BI__builtin_sve_svmovlb_u16:
10975 case SVE::BI__builtin_sve_svmovlb_u32:
10976 case SVE::BI__builtin_sve_svmovlb_u64:
10977 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10978
10979 case SVE::BI__builtin_sve_svmovlb_s16:
10980 case SVE::BI__builtin_sve_svmovlb_s32:
10981 case SVE::BI__builtin_sve_svmovlb_s64:
10982 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10983
10984 case SVE::BI__builtin_sve_svmovlt_u16:
10985 case SVE::BI__builtin_sve_svmovlt_u32:
10986 case SVE::BI__builtin_sve_svmovlt_u64:
10987 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10988
10989 case SVE::BI__builtin_sve_svmovlt_s16:
10990 case SVE::BI__builtin_sve_svmovlt_s32:
10991 case SVE::BI__builtin_sve_svmovlt_s64:
10992 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10993
10994 case SVE::BI__builtin_sve_svpmullt_u16:
10995 case SVE::BI__builtin_sve_svpmullt_u64:
10996 case SVE::BI__builtin_sve_svpmullt_n_u16:
10997 case SVE::BI__builtin_sve_svpmullt_n_u64:
10998 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10999
11000 case SVE::BI__builtin_sve_svpmullb_u16:
11001 case SVE::BI__builtin_sve_svpmullb_u64:
11002 case SVE::BI__builtin_sve_svpmullb_n_u16:
11003 case SVE::BI__builtin_sve_svpmullb_n_u64:
11004 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
11005
11006 case SVE::BI__builtin_sve_svdup_n_b8:
11007 case SVE::BI__builtin_sve_svdup_n_b16:
11008 case SVE::BI__builtin_sve_svdup_n_b32:
11009 case SVE::BI__builtin_sve_svdup_n_b64: {
11010 Value *CmpNE =
11011 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
11012 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
11013 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
11014 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
11015 }
11016
11017 case SVE::BI__builtin_sve_svdupq_n_b8:
11018 case SVE::BI__builtin_sve_svdupq_n_b16:
11019 case SVE::BI__builtin_sve_svdupq_n_b32:
11020 case SVE::BI__builtin_sve_svdupq_n_b64:
11021 case SVE::BI__builtin_sve_svdupq_n_u8:
11022 case SVE::BI__builtin_sve_svdupq_n_s8:
11023 case SVE::BI__builtin_sve_svdupq_n_u64:
11024 case SVE::BI__builtin_sve_svdupq_n_f64:
11025 case SVE::BI__builtin_sve_svdupq_n_s64:
11026 case SVE::BI__builtin_sve_svdupq_n_u16:
11027 case SVE::BI__builtin_sve_svdupq_n_f16:
11028 case SVE::BI__builtin_sve_svdupq_n_bf16:
11029 case SVE::BI__builtin_sve_svdupq_n_s16:
11030 case SVE::BI__builtin_sve_svdupq_n_u32:
11031 case SVE::BI__builtin_sve_svdupq_n_f32:
11032 case SVE::BI__builtin_sve_svdupq_n_s32: {
11033 // These builtins are implemented by storing each element to an array and using
11034 // ld1rq to materialize a vector.
11035 unsigned NumOpnds = Ops.size();
11036
11037 bool IsBoolTy =
11038 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
11039
11040 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
11041 // so that the compare can use the width that is natural for the expected
11042 // number of predicate lanes.
11043 llvm::Type *EltTy = Ops[0]->getType();
11044 if (IsBoolTy)
11045 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
11046
11048 for (unsigned I = 0; I < NumOpnds; ++I)
11049 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
11050 Value *Vec = BuildVector(VecOps);
11051
11052 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
11053 Value *InsertSubVec = Builder.CreateInsertVector(
11054 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
11055
11056 Function *F =
11057 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
11058 Value *DupQLane =
11059 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
11060
11061 if (!IsBoolTy)
11062 return DupQLane;
11063
11064 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11065 Value *Pred = EmitSVEAllTruePred(TypeFlags);
11066
11067 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
11068 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
11069 : Intrinsic::aarch64_sve_cmpne_wide,
11070 OverloadedTy);
11071 Value *Call = Builder.CreateCall(
11072 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
11073 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
11074 }
11075
11076 case SVE::BI__builtin_sve_svpfalse_b:
11077 return ConstantInt::getFalse(Ty);
11078
11079 case SVE::BI__builtin_sve_svpfalse_c: {
11080 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
11081 Function *CastToSVCountF =
11082 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
11083 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
11084 }
11085
11086 case SVE::BI__builtin_sve_svlen_bf16:
11087 case SVE::BI__builtin_sve_svlen_f16:
11088 case SVE::BI__builtin_sve_svlen_f32:
11089 case SVE::BI__builtin_sve_svlen_f64:
11090 case SVE::BI__builtin_sve_svlen_s8:
11091 case SVE::BI__builtin_sve_svlen_s16:
11092 case SVE::BI__builtin_sve_svlen_s32:
11093 case SVE::BI__builtin_sve_svlen_s64:
11094 case SVE::BI__builtin_sve_svlen_u8:
11095 case SVE::BI__builtin_sve_svlen_u16:
11096 case SVE::BI__builtin_sve_svlen_u32:
11097 case SVE::BI__builtin_sve_svlen_u64: {
11098 SVETypeFlags TF(Builtin->TypeModifier);
11099 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
11100 auto *NumEls =
11101 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
11102
11103 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
11104 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
11105 }
11106
11107 case SVE::BI__builtin_sve_svtbl2_u8:
11108 case SVE::BI__builtin_sve_svtbl2_s8:
11109 case SVE::BI__builtin_sve_svtbl2_u16:
11110 case SVE::BI__builtin_sve_svtbl2_s16:
11111 case SVE::BI__builtin_sve_svtbl2_u32:
11112 case SVE::BI__builtin_sve_svtbl2_s32:
11113 case SVE::BI__builtin_sve_svtbl2_u64:
11114 case SVE::BI__builtin_sve_svtbl2_s64:
11115 case SVE::BI__builtin_sve_svtbl2_f16:
11116 case SVE::BI__builtin_sve_svtbl2_bf16:
11117 case SVE::BI__builtin_sve_svtbl2_f32:
11118 case SVE::BI__builtin_sve_svtbl2_f64: {
11119 SVETypeFlags TF(Builtin->TypeModifier);
11120 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
11121 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
11122 return Builder.CreateCall(F, Ops);
11123 }
11124
11125 case SVE::BI__builtin_sve_svset_neonq_s8:
11126 case SVE::BI__builtin_sve_svset_neonq_s16:
11127 case SVE::BI__builtin_sve_svset_neonq_s32:
11128 case SVE::BI__builtin_sve_svset_neonq_s64:
11129 case SVE::BI__builtin_sve_svset_neonq_u8:
11130 case SVE::BI__builtin_sve_svset_neonq_u16:
11131 case SVE::BI__builtin_sve_svset_neonq_u32:
11132 case SVE::BI__builtin_sve_svset_neonq_u64:
11133 case SVE::BI__builtin_sve_svset_neonq_f16:
11134 case SVE::BI__builtin_sve_svset_neonq_f32:
11135 case SVE::BI__builtin_sve_svset_neonq_f64:
11136 case SVE::BI__builtin_sve_svset_neonq_bf16: {
11137 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
11138 }
11139
11140 case SVE::BI__builtin_sve_svget_neonq_s8:
11141 case SVE::BI__builtin_sve_svget_neonq_s16:
11142 case SVE::BI__builtin_sve_svget_neonq_s32:
11143 case SVE::BI__builtin_sve_svget_neonq_s64:
11144 case SVE::BI__builtin_sve_svget_neonq_u8:
11145 case SVE::BI__builtin_sve_svget_neonq_u16:
11146 case SVE::BI__builtin_sve_svget_neonq_u32:
11147 case SVE::BI__builtin_sve_svget_neonq_u64:
11148 case SVE::BI__builtin_sve_svget_neonq_f16:
11149 case SVE::BI__builtin_sve_svget_neonq_f32:
11150 case SVE::BI__builtin_sve_svget_neonq_f64:
11151 case SVE::BI__builtin_sve_svget_neonq_bf16: {
11152 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
11153 }
11154
11155 case SVE::BI__builtin_sve_svdup_neonq_s8:
11156 case SVE::BI__builtin_sve_svdup_neonq_s16:
11157 case SVE::BI__builtin_sve_svdup_neonq_s32:
11158 case SVE::BI__builtin_sve_svdup_neonq_s64:
11159 case SVE::BI__builtin_sve_svdup_neonq_u8:
11160 case SVE::BI__builtin_sve_svdup_neonq_u16:
11161 case SVE::BI__builtin_sve_svdup_neonq_u32:
11162 case SVE::BI__builtin_sve_svdup_neonq_u64:
11163 case SVE::BI__builtin_sve_svdup_neonq_f16:
11164 case SVE::BI__builtin_sve_svdup_neonq_f32:
11165 case SVE::BI__builtin_sve_svdup_neonq_f64:
11166 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
11167 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
11168 Builder.getInt64(0));
11169 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
11170 {Insert, Builder.getInt64(0)});
11171 }
11172 }
11173
11174 /// Should not happen
11175 return nullptr;
11176}
11177
11178static void swapCommutativeSMEOperands(unsigned BuiltinID,
11180 unsigned MultiVec;
11181 switch (BuiltinID) {
11182 default:
11183 return;
11184 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
11185 MultiVec = 1;
11186 break;
11187 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
11188 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
11189 MultiVec = 2;
11190 break;
11191 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
11192 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
11193 MultiVec = 4;
11194 break;
11195 }
11196
11197 if (MultiVec > 0)
11198 for (unsigned I = 0; I < MultiVec; ++I)
11199 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
11200}
11201
11203 const CallExpr *E) {
11204 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
11206
11208 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11209 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
11210
11211 if (TypeFlags.isLoad() || TypeFlags.isStore())
11212 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11213 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
11214 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11215 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
11216 BuiltinID == SME::BI__builtin_sme_svzero_za)
11217 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11218 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
11219 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
11220 BuiltinID == SME::BI__builtin_sme_svldr_za ||
11221 BuiltinID == SME::BI__builtin_sme_svstr_za)
11222 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11223
11224 // Emit set FPMR for intrinsics that require it
11225 if (TypeFlags.setsFPMR())
11226 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
11227 Ops.pop_back_val());
11228 // Handle builtins which require their multi-vector operands to be swapped
11229 swapCommutativeSMEOperands(BuiltinID, Ops);
11230
11231 // Should not happen!
11232 if (Builtin->LLVMIntrinsic == 0)
11233 return nullptr;
11234
11235 if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
11236 // If we already know the streaming mode, don't bother with the intrinsic
11237 // and emit a constant instead
11238 const auto *FD = cast<FunctionDecl>(CurFuncDecl);
11239 if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
11240 unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
11241 if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
11242 bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
11243 return ConstantInt::getBool(Builder.getContext(), IsStreaming);
11244 }
11245 }
11246 }
11247
11248 // Predicates must match the main datatype.
11249 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
11250 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
11251 if (PredTy->getElementType()->isIntegerTy(1))
11252 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
11253
11254 Function *F =
11255 TypeFlags.isOverloadNone()
11256 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
11257 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
11258
11259 return Builder.CreateCall(F, Ops);
11260}
11261
11263 const CallExpr *E,
11264 llvm::Triple::ArchType Arch) {
11265 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
11266 BuiltinID <= clang::AArch64::LastSVEBuiltin)
11267 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
11268
11269 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
11270 BuiltinID <= clang::AArch64::LastSMEBuiltin)
11271 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
11272
11273 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
11274 return EmitAArch64CpuSupports(E);
11275
11276 unsigned HintID = static_cast<unsigned>(-1);
11277 switch (BuiltinID) {
11278 default: break;
11279 case clang::AArch64::BI__builtin_arm_nop:
11280 HintID = 0;
11281 break;
11282 case clang::AArch64::BI__builtin_arm_yield:
11283 case clang::AArch64::BI__yield:
11284 HintID = 1;
11285 break;
11286 case clang::AArch64::BI__builtin_arm_wfe:
11287 case clang::AArch64::BI__wfe:
11288 HintID = 2;
11289 break;
11290 case clang::AArch64::BI__builtin_arm_wfi:
11291 case clang::AArch64::BI__wfi:
11292 HintID = 3;
11293 break;
11294 case clang::AArch64::BI__builtin_arm_sev:
11295 case clang::AArch64::BI__sev:
11296 HintID = 4;
11297 break;
11298 case clang::AArch64::BI__builtin_arm_sevl:
11299 case clang::AArch64::BI__sevl:
11300 HintID = 5;
11301 break;
11302 }
11303
11304 if (HintID != static_cast<unsigned>(-1)) {
11305 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
11306 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
11307 }
11308
11309 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
11310 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
11311 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11312 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
11313 }
11314
11315 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
11316 // Create call to __arm_sme_state and store the results to the two pointers.
11318 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
11319 false),
11320 "__arm_sme_state"));
11321 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
11322 "aarch64_pstate_sm_compatible");
11323 CI->setAttributes(Attrs);
11324 CI->setCallingConv(
11325 llvm::CallingConv::
11326 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
11327 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
11328 EmitPointerWithAlignment(E->getArg(0)));
11329 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
11330 EmitPointerWithAlignment(E->getArg(1)));
11331 }
11332
11333 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
11334 assert((getContext().getTypeSize(E->getType()) == 32) &&
11335 "rbit of unusual size!");
11336 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11337 return Builder.CreateCall(
11338 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11339 }
11340 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
11341 assert((getContext().getTypeSize(E->getType()) == 64) &&
11342 "rbit of unusual size!");
11343 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11344 return Builder.CreateCall(
11345 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11346 }
11347
11348 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
11349 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
11350 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11351 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
11352 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11353 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
11354 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
11355 return Res;
11356 }
11357
11358 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
11359 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11360 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
11361 "cls");
11362 }
11363 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11364 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11365 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
11366 "cls");
11367 }
11368
11369 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11370 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11371 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11372 llvm::Type *Ty = Arg->getType();
11373 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11374 Arg, "frint32z");
11375 }
11376
11377 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11378 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11379 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11380 llvm::Type *Ty = Arg->getType();
11381 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11382 Arg, "frint64z");
11383 }
11384
11385 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11386 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11387 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11388 llvm::Type *Ty = Arg->getType();
11389 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11390 Arg, "frint32x");
11391 }
11392
11393 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11394 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11395 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11396 llvm::Type *Ty = Arg->getType();
11397 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11398 Arg, "frint64x");
11399 }
11400
11401 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11402 assert((getContext().getTypeSize(E->getType()) == 32) &&
11403 "__jcvt of unusual size!");
11404 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11405 return Builder.CreateCall(
11406 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11407 }
11408
11409 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11410 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11411 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11412 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11413 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11414 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11415
11416 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11417 // Load from the address via an LLVM intrinsic, receiving a
11418 // tuple of 8 i64 words, and store each one to ValPtr.
11419 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11420 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11421 llvm::Value *ToRet;
11422 for (size_t i = 0; i < 8; i++) {
11423 llvm::Value *ValOffsetPtr =
11424 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11425 Address Addr =
11426 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11427 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11428 }
11429 return ToRet;
11430 } else {
11431 // Load 8 i64 words from ValPtr, and store them to the address
11432 // via an LLVM intrinsic.
11434 Args.push_back(MemAddr);
11435 for (size_t i = 0; i < 8; i++) {
11436 llvm::Value *ValOffsetPtr =
11437 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11438 Address Addr =
11439 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11440 Args.push_back(Builder.CreateLoad(Addr));
11441 }
11442
11443 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11444 ? Intrinsic::aarch64_st64b
11445 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11446 ? Intrinsic::aarch64_st64bv
11447 : Intrinsic::aarch64_st64bv0);
11448 Function *F = CGM.getIntrinsic(Intr);
11449 return Builder.CreateCall(F, Args);
11450 }
11451 }
11452
11453 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11454 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11455
11456 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11457 ? Intrinsic::aarch64_rndr
11458 : Intrinsic::aarch64_rndrrs);
11459 Function *F = CGM.getIntrinsic(Intr);
11460 llvm::Value *Val = Builder.CreateCall(F);
11461 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11462 Value *Status = Builder.CreateExtractValue(Val, 1);
11463
11464 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11465 Builder.CreateStore(RandomValue, MemAddress);
11466 Status = Builder.CreateZExt(Status, Int32Ty);
11467 return Status;
11468 }
11469
11470 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11471 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11472 const FunctionDecl *FD = E->getDirectCallee();
11473 Value *Ops[2];
11474 for (unsigned i = 0; i < 2; i++)
11475 Ops[i] = EmitScalarExpr(E->getArg(i));
11476 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11477 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11478 StringRef Name = FD->getName();
11479 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11480 }
11481
11482 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11483 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11484 getContext().getTypeSize(E->getType()) == 128) {
11485 Function *F =
11486 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11487 ? Intrinsic::aarch64_ldaxp
11488 : Intrinsic::aarch64_ldxp);
11489
11490 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11491 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11492
11493 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11494 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11495 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11496 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11497 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11498
11499 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11500 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11501 Val = Builder.CreateOr(Val, Val1);
11502 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11503 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11504 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11505 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11506
11507 QualType Ty = E->getType();
11508 llvm::Type *RealResTy = ConvertType(Ty);
11509 llvm::Type *IntTy =
11510 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11511
11512 Function *F =
11513 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11514 ? Intrinsic::aarch64_ldaxr
11515 : Intrinsic::aarch64_ldxr,
11516 UnqualPtrTy);
11517 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11518 Val->addParamAttr(
11519 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11520
11521 if (RealResTy->isPointerTy())
11522 return Builder.CreateIntToPtr(Val, RealResTy);
11523
11524 llvm::Type *IntResTy = llvm::IntegerType::get(
11525 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11526 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11527 RealResTy);
11528 }
11529
11530 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11531 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11532 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11533 Function *F =
11534 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11535 ? Intrinsic::aarch64_stlxp
11536 : Intrinsic::aarch64_stxp);
11537 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11538
11539 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11540 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11541
11542 Tmp = Tmp.withElementType(STy);
11543 llvm::Value *Val = Builder.CreateLoad(Tmp);
11544
11545 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11546 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11547 Value *StPtr = EmitScalarExpr(E->getArg(1));
11548 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11549 }
11550
11551 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11552 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11553 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11554 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11555
11556 QualType Ty = E->getArg(0)->getType();
11557 llvm::Type *StoreTy =
11558 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11559
11560 if (StoreVal->getType()->isPointerTy())
11561 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11562 else {
11563 llvm::Type *IntTy = llvm::IntegerType::get(
11565 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11566 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11567 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11568 }
11569
11570 Function *F =
11571 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11572 ? Intrinsic::aarch64_stlxr
11573 : Intrinsic::aarch64_stxr,
11574 StoreAddr->getType());
11575 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11576 CI->addParamAttr(
11577 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11578 return CI;
11579 }
11580
11581 if (BuiltinID == clang::AArch64::BI__getReg) {
11583 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11584 llvm_unreachable("Sema will ensure that the parameter is constant");
11585
11586 llvm::APSInt Value = Result.Val.getInt();
11587 LLVMContext &Context = CGM.getLLVMContext();
11588 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11589
11590 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11591 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11592 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11593
11594 llvm::Function *F =
11595 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11596 return Builder.CreateCall(F, Metadata);
11597 }
11598
11599 if (BuiltinID == clang::AArch64::BI__break) {
11601 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11602 llvm_unreachable("Sema will ensure that the parameter is constant");
11603
11604 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11605 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11606 }
11607
11608 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11609 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11610 return Builder.CreateCall(F);
11611 }
11612
11613 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11614 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11615 llvm::SyncScope::SingleThread);
11616
11617 // CRC32
11618 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11619 switch (BuiltinID) {
11620 case clang::AArch64::BI__builtin_arm_crc32b:
11621 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11622 case clang::AArch64::BI__builtin_arm_crc32cb:
11623 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11624 case clang::AArch64::BI__builtin_arm_crc32h:
11625 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11626 case clang::AArch64::BI__builtin_arm_crc32ch:
11627 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11628 case clang::AArch64::BI__builtin_arm_crc32w:
11629 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11630 case clang::AArch64::BI__builtin_arm_crc32cw:
11631 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11632 case clang::AArch64::BI__builtin_arm_crc32d:
11633 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11634 case clang::AArch64::BI__builtin_arm_crc32cd:
11635 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11636 }
11637
11638 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11639 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11640 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11641 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11642
11643 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11644 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11645
11646 return Builder.CreateCall(F, {Arg0, Arg1});
11647 }
11648
11649 // Memory Operations (MOPS)
11650 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11651 Value *Dst = EmitScalarExpr(E->getArg(0));
11652 Value *Val = EmitScalarExpr(E->getArg(1));
11653 Value *Size = EmitScalarExpr(E->getArg(2));
11654 Val = Builder.CreateTrunc(Val, Int8Ty);
11655 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11656 return Builder.CreateCall(
11657 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11658 }
11659
11660 // Memory Tagging Extensions (MTE) Intrinsics
11661 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11662 switch (BuiltinID) {
11663 case clang::AArch64::BI__builtin_arm_irg:
11664 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11665 case clang::AArch64::BI__builtin_arm_addg:
11666 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11667 case clang::AArch64::BI__builtin_arm_gmi:
11668 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11669 case clang::AArch64::BI__builtin_arm_ldg:
11670 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11671 case clang::AArch64::BI__builtin_arm_stg:
11672 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11673 case clang::AArch64::BI__builtin_arm_subp:
11674 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11675 }
11676
11677 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11678 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11679 Value *Pointer = EmitScalarExpr(E->getArg(0));
11680 Value *Mask = EmitScalarExpr(E->getArg(1));
11681
11682 Mask = Builder.CreateZExt(Mask, Int64Ty);
11683 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11684 {Pointer, Mask});
11685 }
11686 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11687 Value *Pointer = EmitScalarExpr(E->getArg(0));
11688 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11689
11690 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11691 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11692 {Pointer, TagOffset});
11693 }
11694 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11695 Value *Pointer = EmitScalarExpr(E->getArg(0));
11696 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11697
11698 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11699 return Builder.CreateCall(
11700 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11701 }
11702 // Although it is possible to supply a different return
11703 // address (first arg) to this intrinsic, for now we set
11704 // return address same as input address.
11705 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11706 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11707 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11708 {TagAddress, TagAddress});
11709 }
11710 // Although it is possible to supply a different tag (to set)
11711 // to this intrinsic (as first arg), for now we supply
11712 // the tag that is in input address arg (common use case).
11713 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11714 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11715 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11716 {TagAddress, TagAddress});
11717 }
11718 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11719 Value *PointerA = EmitScalarExpr(E->getArg(0));
11720 Value *PointerB = EmitScalarExpr(E->getArg(1));
11721 return Builder.CreateCall(
11722 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11723 }
11724 }
11725
11726 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11727 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11728 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11729 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11730 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11731 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11732 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11733 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11734
11735 SpecialRegisterAccessKind AccessKind = Write;
11736 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11737 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11738 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11739 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11740 AccessKind = VolatileRead;
11741
11742 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11743 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11744
11745 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11746 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11747
11748 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11749 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11750
11751 llvm::Type *ValueType;
11752 llvm::Type *RegisterType = Int64Ty;
11753 if (Is32Bit) {
11754 ValueType = Int32Ty;
11755 } else if (Is128Bit) {
11756 llvm::Type *Int128Ty =
11757 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11758 ValueType = Int128Ty;
11759 RegisterType = Int128Ty;
11760 } else if (IsPointerBuiltin) {
11761 ValueType = VoidPtrTy;
11762 } else {
11763 ValueType = Int64Ty;
11764 };
11765
11766 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11767 AccessKind);
11768 }
11769
11770 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11771 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11772 LLVMContext &Context = CGM.getLLVMContext();
11773
11774 unsigned SysReg =
11775 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11776
11777 std::string SysRegStr;
11778 llvm::raw_string_ostream(SysRegStr) <<
11779 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11780 ((SysReg >> 11) & 7) << ":" <<
11781 ((SysReg >> 7) & 15) << ":" <<
11782 ((SysReg >> 3) & 15) << ":" <<
11783 ( SysReg & 7);
11784
11785 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11786 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11787 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11788
11789 llvm::Type *RegisterType = Int64Ty;
11790 llvm::Type *Types[] = { RegisterType };
11791
11792 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11793 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11794
11795 return Builder.CreateCall(F, Metadata);
11796 }
11797
11798 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11799 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11800
11801 return Builder.CreateCall(F, { Metadata, ArgValue });
11802 }
11803
11804 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11805 llvm::Function *F =
11806 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11807 return Builder.CreateCall(F);
11808 }
11809
11810 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11811 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11812 return Builder.CreateCall(F);
11813 }
11814
11815 if (BuiltinID == clang::AArch64::BI__mulh ||
11816 BuiltinID == clang::AArch64::BI__umulh) {
11817 llvm::Type *ResType = ConvertType(E->getType());
11818 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11819
11820 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11821 Value *LHS =
11822 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11823 Value *RHS =
11824 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11825
11826 Value *MulResult, *HigherBits;
11827 if (IsSigned) {
11828 MulResult = Builder.CreateNSWMul(LHS, RHS);
11829 HigherBits = Builder.CreateAShr(MulResult, 64);
11830 } else {
11831 MulResult = Builder.CreateNUWMul(LHS, RHS);
11832 HigherBits = Builder.CreateLShr(MulResult, 64);
11833 }
11834 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11835
11836 return HigherBits;
11837 }
11838
11839 if (BuiltinID == AArch64::BI__writex18byte ||
11840 BuiltinID == AArch64::BI__writex18word ||
11841 BuiltinID == AArch64::BI__writex18dword ||
11842 BuiltinID == AArch64::BI__writex18qword) {
11843 // Process the args first
11844 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11845 Value *DataArg = EmitScalarExpr(E->getArg(1));
11846
11847 // Read x18 as i8*
11848 llvm::Value *X18 = readX18AsPtr(*this);
11849
11850 // Store val at x18 + offset
11851 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11852 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11853 StoreInst *Store =
11854 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
11855 return Store;
11856 }
11857
11858 if (BuiltinID == AArch64::BI__readx18byte ||
11859 BuiltinID == AArch64::BI__readx18word ||
11860 BuiltinID == AArch64::BI__readx18dword ||
11861 BuiltinID == AArch64::BI__readx18qword) {
11862 // Process the args first
11863 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11864
11865 // Read x18 as i8*
11866 llvm::Value *X18 = readX18AsPtr(*this);
11867
11868 // Load x18 + offset
11869 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11870 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11871 llvm::Type *IntTy = ConvertType(E->getType());
11872 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11873 return Load;
11874 }
11875
11876 if (BuiltinID == AArch64::BI__addx18byte ||
11877 BuiltinID == AArch64::BI__addx18word ||
11878 BuiltinID == AArch64::BI__addx18dword ||
11879 BuiltinID == AArch64::BI__addx18qword ||
11880 BuiltinID == AArch64::BI__incx18byte ||
11881 BuiltinID == AArch64::BI__incx18word ||
11882 BuiltinID == AArch64::BI__incx18dword ||
11883 BuiltinID == AArch64::BI__incx18qword) {
11884 llvm::Type *IntTy;
11885 bool isIncrement;
11886 switch (BuiltinID) {
11887 case AArch64::BI__incx18byte:
11888 IntTy = Int8Ty;
11889 isIncrement = true;
11890 break;
11891 case AArch64::BI__incx18word:
11892 IntTy = Int16Ty;
11893 isIncrement = true;
11894 break;
11895 case AArch64::BI__incx18dword:
11896 IntTy = Int32Ty;
11897 isIncrement = true;
11898 break;
11899 case AArch64::BI__incx18qword:
11900 IntTy = Int64Ty;
11901 isIncrement = true;
11902 break;
11903 default:
11904 IntTy = ConvertType(E->getArg(1)->getType());
11905 isIncrement = false;
11906 break;
11907 }
11908 // Process the args first
11909 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11910 Value *ValToAdd =
11911 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
11912
11913 // Read x18 as i8*
11914 llvm::Value *X18 = readX18AsPtr(*this);
11915
11916 // Load x18 + offset
11917 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11918 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11919 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11920
11921 // Add values
11922 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
11923
11924 // Store val at x18 + offset
11925 StoreInst *Store =
11926 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
11927 return Store;
11928 }
11929
11930 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11931 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11932 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11933 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11934 Value *Arg = EmitScalarExpr(E->getArg(0));
11935 llvm::Type *RetTy = ConvertType(E->getType());
11936 return Builder.CreateBitCast(Arg, RetTy);
11937 }
11938
11939 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11940 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11941 BuiltinID == AArch64::BI_CountLeadingZeros ||
11942 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11943 Value *Arg = EmitScalarExpr(E->getArg(0));
11944 llvm::Type *ArgType = Arg->getType();
11945
11946 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11947 BuiltinID == AArch64::BI_CountLeadingOnes64)
11948 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11949
11950 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11951 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11952
11953 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11954 BuiltinID == AArch64::BI_CountLeadingZeros64)
11955 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11956 return Result;
11957 }
11958
11959 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11960 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11961 Value *Arg = EmitScalarExpr(E->getArg(0));
11962
11963 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11964 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11965 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11966
11967 Value *Result = Builder.CreateCall(F, Arg, "cls");
11968 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11969 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11970 return Result;
11971 }
11972
11973 if (BuiltinID == AArch64::BI_CountOneBits ||
11974 BuiltinID == AArch64::BI_CountOneBits64) {
11975 Value *ArgValue = EmitScalarExpr(E->getArg(0));
11976 llvm::Type *ArgType = ArgValue->getType();
11977 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11978
11979 Value *Result = Builder.CreateCall(F, ArgValue);
11980 if (BuiltinID == AArch64::BI_CountOneBits64)
11981 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11982 return Result;
11983 }
11984
11985 if (BuiltinID == AArch64::BI__prefetch) {
11986 Value *Address = EmitScalarExpr(E->getArg(0));
11987 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11988 Value *Locality = ConstantInt::get(Int32Ty, 3);
11989 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11990 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11991 return Builder.CreateCall(F, {Address, RW, Locality, Data});
11992 }
11993
11994 if (BuiltinID == AArch64::BI__hlt) {
11995 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
11996 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11997
11998 // Return 0 for convenience, even though MSVC returns some other undefined
11999 // value.
12000 return ConstantInt::get(Builder.getInt32Ty(), 0);
12001 }
12002
12003 if (BuiltinID == NEON::BI__builtin_neon_vcvth_bf16_f32)
12004 return Builder.CreateFPTrunc(
12005 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
12006 Builder.getFloatTy()),
12007 Builder.getBFloatTy());
12008
12009 // Handle MSVC intrinsics before argument evaluation to prevent double
12010 // evaluation.
12011 if (std::optional<MSVCIntrin> MsvcIntId =
12013 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
12014
12015 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
12016 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
12017 return P.first == BuiltinID;
12018 });
12019 if (It != end(NEONEquivalentIntrinsicMap))
12020 BuiltinID = It->second;
12021
12022 // Find out if any arguments are required to be integer constant
12023 // expressions.
12024 unsigned ICEArguments = 0;
12026 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
12027 assert(Error == ASTContext::GE_None && "Should not codegen an error");
12028
12030 Address PtrOp0 = Address::invalid();
12031 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
12032 if (i == 0) {
12033 switch (BuiltinID) {
12034 case NEON::BI__builtin_neon_vld1_v:
12035 case NEON::BI__builtin_neon_vld1q_v:
12036 case NEON::BI__builtin_neon_vld1_dup_v:
12037 case NEON::BI__builtin_neon_vld1q_dup_v:
12038 case NEON::BI__builtin_neon_vld1_lane_v:
12039 case NEON::BI__builtin_neon_vld1q_lane_v:
12040 case NEON::BI__builtin_neon_vst1_v:
12041 case NEON::BI__builtin_neon_vst1q_v:
12042 case NEON::BI__builtin_neon_vst1_lane_v:
12043 case NEON::BI__builtin_neon_vst1q_lane_v:
12044 case NEON::BI__builtin_neon_vldap1_lane_s64:
12045 case NEON::BI__builtin_neon_vldap1q_lane_s64:
12046 case NEON::BI__builtin_neon_vstl1_lane_s64:
12047 case NEON::BI__builtin_neon_vstl1q_lane_s64:
12048 // Get the alignment for the argument in addition to the value;
12049 // we'll use it later.
12050 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
12051 Ops.push_back(PtrOp0.emitRawPointer(*this));
12052 continue;
12053 }
12054 }
12055 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
12056 }
12057
12058 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
12059 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
12060 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
12061
12062 if (Builtin) {
12063 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
12064 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
12065 assert(Result && "SISD intrinsic should have been handled");
12066 return Result;
12067 }
12068
12069 const Expr *Arg = E->getArg(E->getNumArgs()-1);
12071 if (std::optional<llvm::APSInt> Result =
12073 // Determine the type of this overloaded NEON intrinsic.
12074 Type = NeonTypeFlags(Result->getZExtValue());
12075
12076 bool usgn = Type.isUnsigned();
12077 bool quad = Type.isQuad();
12078
12079 // Handle non-overloaded intrinsics first.
12080 switch (BuiltinID) {
12081 default: break;
12082 case NEON::BI__builtin_neon_vabsh_f16:
12083 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12084 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
12085 case NEON::BI__builtin_neon_vaddq_p128: {
12086 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
12087 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12088 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12089 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12090 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
12091 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12092 return Builder.CreateBitCast(Ops[0], Int128Ty);
12093 }
12094 case NEON::BI__builtin_neon_vldrq_p128: {
12095 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12096 Value *Ptr = EmitScalarExpr(E->getArg(0));
12097 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
12099 }
12100 case NEON::BI__builtin_neon_vstrq_p128: {
12101 Value *Ptr = Ops[0];
12102 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
12103 }
12104 case NEON::BI__builtin_neon_vcvts_f32_u32:
12105 case NEON::BI__builtin_neon_vcvtd_f64_u64:
12106 usgn = true;
12107 [[fallthrough]];
12108 case NEON::BI__builtin_neon_vcvts_f32_s32:
12109 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
12110 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12111 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
12112 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
12113 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
12114 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12115 if (usgn)
12116 return Builder.CreateUIToFP(Ops[0], FTy);
12117 return Builder.CreateSIToFP(Ops[0], FTy);
12118 }
12119 case NEON::BI__builtin_neon_vcvth_f16_u16:
12120 case NEON::BI__builtin_neon_vcvth_f16_u32:
12121 case NEON::BI__builtin_neon_vcvth_f16_u64:
12122 usgn = true;
12123 [[fallthrough]];
12124 case NEON::BI__builtin_neon_vcvth_f16_s16:
12125 case NEON::BI__builtin_neon_vcvth_f16_s32:
12126 case NEON::BI__builtin_neon_vcvth_f16_s64: {
12127 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12128 llvm::Type *FTy = HalfTy;
12129 llvm::Type *InTy;
12130 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
12131 InTy = Int64Ty;
12132 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
12133 InTy = Int32Ty;
12134 else
12135 InTy = Int16Ty;
12136 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12137 if (usgn)
12138 return Builder.CreateUIToFP(Ops[0], FTy);
12139 return Builder.CreateSIToFP(Ops[0], FTy);
12140 }
12141 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12142 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12143 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12144 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12145 case NEON::BI__builtin_neon_vcvth_u16_f16:
12146 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12147 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12148 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12149 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12150 case NEON::BI__builtin_neon_vcvth_s16_f16: {
12151 unsigned Int;
12152 llvm::Type* InTy = Int32Ty;
12153 llvm::Type* FTy = HalfTy;
12154 llvm::Type *Tys[2] = {InTy, FTy};
12155 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12156 switch (BuiltinID) {
12157 default: llvm_unreachable("missing builtin ID in switch!");
12158 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12159 Int = Intrinsic::aarch64_neon_fcvtau; break;
12160 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12161 Int = Intrinsic::aarch64_neon_fcvtmu; break;
12162 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12163 Int = Intrinsic::aarch64_neon_fcvtnu; break;
12164 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12165 Int = Intrinsic::aarch64_neon_fcvtpu; break;
12166 case NEON::BI__builtin_neon_vcvth_u16_f16:
12167 Int = Intrinsic::aarch64_neon_fcvtzu; break;
12168 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12169 Int = Intrinsic::aarch64_neon_fcvtas; break;
12170 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12171 Int = Intrinsic::aarch64_neon_fcvtms; break;
12172 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12173 Int = Intrinsic::aarch64_neon_fcvtns; break;
12174 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12175 Int = Intrinsic::aarch64_neon_fcvtps; break;
12176 case NEON::BI__builtin_neon_vcvth_s16_f16:
12177 Int = Intrinsic::aarch64_neon_fcvtzs; break;
12178 }
12179 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
12180 return Builder.CreateTrunc(Ops[0], Int16Ty);
12181 }
12182 case NEON::BI__builtin_neon_vcaleh_f16:
12183 case NEON::BI__builtin_neon_vcalth_f16:
12184 case NEON::BI__builtin_neon_vcageh_f16:
12185 case NEON::BI__builtin_neon_vcagth_f16: {
12186 unsigned Int;
12187 llvm::Type* InTy = Int32Ty;
12188 llvm::Type* FTy = HalfTy;
12189 llvm::Type *Tys[2] = {InTy, FTy};
12190 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12191 switch (BuiltinID) {
12192 default: llvm_unreachable("missing builtin ID in switch!");
12193 case NEON::BI__builtin_neon_vcageh_f16:
12194 Int = Intrinsic::aarch64_neon_facge; break;
12195 case NEON::BI__builtin_neon_vcagth_f16:
12196 Int = Intrinsic::aarch64_neon_facgt; break;
12197 case NEON::BI__builtin_neon_vcaleh_f16:
12198 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
12199 case NEON::BI__builtin_neon_vcalth_f16:
12200 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
12201 }
12202 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
12203 return Builder.CreateTrunc(Ops[0], Int16Ty);
12204 }
12205 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12206 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
12207 unsigned Int;
12208 llvm::Type* InTy = Int32Ty;
12209 llvm::Type* FTy = HalfTy;
12210 llvm::Type *Tys[2] = {InTy, FTy};
12211 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12212 switch (BuiltinID) {
12213 default: llvm_unreachable("missing builtin ID in switch!");
12214 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12215 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
12216 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
12217 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
12218 }
12219 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12220 return Builder.CreateTrunc(Ops[0], Int16Ty);
12221 }
12222 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12223 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
12224 unsigned Int;
12225 llvm::Type* FTy = HalfTy;
12226 llvm::Type* InTy = Int32Ty;
12227 llvm::Type *Tys[2] = {FTy, InTy};
12228 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12229 switch (BuiltinID) {
12230 default: llvm_unreachable("missing builtin ID in switch!");
12231 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12232 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
12233 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
12234 break;
12235 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
12236 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
12237 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
12238 break;
12239 }
12240 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12241 }
12242 case NEON::BI__builtin_neon_vpaddd_s64: {
12243 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
12244 Value *Vec = EmitScalarExpr(E->getArg(0));
12245 // The vector is v2f64, so make sure it's bitcast to that.
12246 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
12247 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12248 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12249 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12250 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12251 // Pairwise addition of a v2f64 into a scalar f64.
12252 return Builder.CreateAdd(Op0, Op1, "vpaddd");
12253 }
12254 case NEON::BI__builtin_neon_vpaddd_f64: {
12255 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
12256 Value *Vec = EmitScalarExpr(E->getArg(0));
12257 // The vector is v2f64, so make sure it's bitcast to that.
12258 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
12259 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12260 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12261 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12262 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12263 // Pairwise addition of a v2f64 into a scalar f64.
12264 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12265 }
12266 case NEON::BI__builtin_neon_vpadds_f32: {
12267 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
12268 Value *Vec = EmitScalarExpr(E->getArg(0));
12269 // The vector is v2f32, so make sure it's bitcast to that.
12270 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
12271 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12272 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12273 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12274 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12275 // Pairwise addition of a v2f32 into a scalar f32.
12276 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12277 }
12278 case NEON::BI__builtin_neon_vceqzd_s64:
12279 case NEON::BI__builtin_neon_vceqzd_f64:
12280 case NEON::BI__builtin_neon_vceqzs_f32:
12281 case NEON::BI__builtin_neon_vceqzh_f16:
12282 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12284 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12285 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
12286 case NEON::BI__builtin_neon_vcgezd_s64:
12287 case NEON::BI__builtin_neon_vcgezd_f64:
12288 case NEON::BI__builtin_neon_vcgezs_f32:
12289 case NEON::BI__builtin_neon_vcgezh_f16:
12290 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12292 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12293 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
12294 case NEON::BI__builtin_neon_vclezd_s64:
12295 case NEON::BI__builtin_neon_vclezd_f64:
12296 case NEON::BI__builtin_neon_vclezs_f32:
12297 case NEON::BI__builtin_neon_vclezh_f16:
12298 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12300 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12301 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
12302 case NEON::BI__builtin_neon_vcgtzd_s64:
12303 case NEON::BI__builtin_neon_vcgtzd_f64:
12304 case NEON::BI__builtin_neon_vcgtzs_f32:
12305 case NEON::BI__builtin_neon_vcgtzh_f16:
12306 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12308 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12309 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
12310 case NEON::BI__builtin_neon_vcltzd_s64:
12311 case NEON::BI__builtin_neon_vcltzd_f64:
12312 case NEON::BI__builtin_neon_vcltzs_f32:
12313 case NEON::BI__builtin_neon_vcltzh_f16:
12314 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12316 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12317 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
12318
12319 case NEON::BI__builtin_neon_vceqzd_u64: {
12320 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12321 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12322 Ops[0] =
12323 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
12324 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
12325 }
12326 case NEON::BI__builtin_neon_vceqd_f64:
12327 case NEON::BI__builtin_neon_vcled_f64:
12328 case NEON::BI__builtin_neon_vcltd_f64:
12329 case NEON::BI__builtin_neon_vcged_f64:
12330 case NEON::BI__builtin_neon_vcgtd_f64: {
12331 llvm::CmpInst::Predicate P;
12332 switch (BuiltinID) {
12333 default: llvm_unreachable("missing builtin ID in switch!");
12334 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
12335 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
12336 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
12337 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
12338 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
12339 }
12340 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12341 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12342 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12343 if (P == llvm::FCmpInst::FCMP_OEQ)
12344 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12345 else
12346 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12347 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
12348 }
12349 case NEON::BI__builtin_neon_vceqs_f32:
12350 case NEON::BI__builtin_neon_vcles_f32:
12351 case NEON::BI__builtin_neon_vclts_f32:
12352 case NEON::BI__builtin_neon_vcges_f32:
12353 case NEON::BI__builtin_neon_vcgts_f32: {
12354 llvm::CmpInst::Predicate P;
12355 switch (BuiltinID) {
12356 default: llvm_unreachable("missing builtin ID in switch!");
12357 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
12358 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
12359 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
12360 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
12361 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
12362 }
12363 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12364 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
12365 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
12366 if (P == llvm::FCmpInst::FCMP_OEQ)
12367 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12368 else
12369 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12370 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
12371 }
12372 case NEON::BI__builtin_neon_vceqh_f16:
12373 case NEON::BI__builtin_neon_vcleh_f16:
12374 case NEON::BI__builtin_neon_vclth_f16:
12375 case NEON::BI__builtin_neon_vcgeh_f16:
12376 case NEON::BI__builtin_neon_vcgth_f16: {
12377 llvm::CmpInst::Predicate P;
12378 switch (BuiltinID) {
12379 default: llvm_unreachable("missing builtin ID in switch!");
12380 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
12381 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
12382 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
12383 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
12384 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
12385 }
12386 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12387 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
12388 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
12389 if (P == llvm::FCmpInst::FCMP_OEQ)
12390 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12391 else
12392 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12393 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
12394 }
12395 case NEON::BI__builtin_neon_vceqd_s64:
12396 case NEON::BI__builtin_neon_vceqd_u64:
12397 case NEON::BI__builtin_neon_vcgtd_s64:
12398 case NEON::BI__builtin_neon_vcgtd_u64:
12399 case NEON::BI__builtin_neon_vcltd_s64:
12400 case NEON::BI__builtin_neon_vcltd_u64:
12401 case NEON::BI__builtin_neon_vcged_u64:
12402 case NEON::BI__builtin_neon_vcged_s64:
12403 case NEON::BI__builtin_neon_vcled_u64:
12404 case NEON::BI__builtin_neon_vcled_s64: {
12405 llvm::CmpInst::Predicate P;
12406 switch (BuiltinID) {
12407 default: llvm_unreachable("missing builtin ID in switch!");
12408 case NEON::BI__builtin_neon_vceqd_s64:
12409 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12410 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12411 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12412 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12413 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12414 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12415 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12416 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12417 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12418 }
12419 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12420 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12421 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12422 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12423 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12424 }
12425 case NEON::BI__builtin_neon_vtstd_s64:
12426 case NEON::BI__builtin_neon_vtstd_u64: {
12427 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12428 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12429 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12430 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12431 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12432 llvm::Constant::getNullValue(Int64Ty));
12433 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12434 }
12435 case NEON::BI__builtin_neon_vset_lane_i8:
12436 case NEON::BI__builtin_neon_vset_lane_i16:
12437 case NEON::BI__builtin_neon_vset_lane_i32:
12438 case NEON::BI__builtin_neon_vset_lane_i64:
12439 case NEON::BI__builtin_neon_vset_lane_bf16:
12440 case NEON::BI__builtin_neon_vset_lane_f32:
12441 case NEON::BI__builtin_neon_vsetq_lane_i8:
12442 case NEON::BI__builtin_neon_vsetq_lane_i16:
12443 case NEON::BI__builtin_neon_vsetq_lane_i32:
12444 case NEON::BI__builtin_neon_vsetq_lane_i64:
12445 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12446 case NEON::BI__builtin_neon_vsetq_lane_f32:
12447 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12448 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12449 case NEON::BI__builtin_neon_vset_lane_f64:
12450 // The vector type needs a cast for the v1f64 variant.
12451 Ops[1] =
12452 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12453 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12454 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12455 case NEON::BI__builtin_neon_vsetq_lane_f64:
12456 // The vector type needs a cast for the v2f64 variant.
12457 Ops[1] =
12458 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12459 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12460 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12461
12462 case NEON::BI__builtin_neon_vget_lane_i8:
12463 case NEON::BI__builtin_neon_vdupb_lane_i8:
12464 Ops[0] =
12465 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12466 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12467 "vget_lane");
12468 case NEON::BI__builtin_neon_vgetq_lane_i8:
12469 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12470 Ops[0] =
12471 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12472 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12473 "vgetq_lane");
12474 case NEON::BI__builtin_neon_vget_lane_i16:
12475 case NEON::BI__builtin_neon_vduph_lane_i16:
12476 Ops[0] =
12477 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12478 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12479 "vget_lane");
12480 case NEON::BI__builtin_neon_vgetq_lane_i16:
12481 case NEON::BI__builtin_neon_vduph_laneq_i16:
12482 Ops[0] =
12483 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12484 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12485 "vgetq_lane");
12486 case NEON::BI__builtin_neon_vget_lane_i32:
12487 case NEON::BI__builtin_neon_vdups_lane_i32:
12488 Ops[0] =
12489 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12490 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12491 "vget_lane");
12492 case NEON::BI__builtin_neon_vdups_lane_f32:
12493 Ops[0] =
12494 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12495 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12496 "vdups_lane");
12497 case NEON::BI__builtin_neon_vgetq_lane_i32:
12498 case NEON::BI__builtin_neon_vdups_laneq_i32:
12499 Ops[0] =
12500 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12501 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12502 "vgetq_lane");
12503 case NEON::BI__builtin_neon_vget_lane_i64:
12504 case NEON::BI__builtin_neon_vdupd_lane_i64:
12505 Ops[0] =
12506 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12507 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12508 "vget_lane");
12509 case NEON::BI__builtin_neon_vdupd_lane_f64:
12510 Ops[0] =
12511 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12512 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12513 "vdupd_lane");
12514 case NEON::BI__builtin_neon_vgetq_lane_i64:
12515 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12516 Ops[0] =
12517 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12518 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12519 "vgetq_lane");
12520 case NEON::BI__builtin_neon_vget_lane_f32:
12521 Ops[0] =
12522 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12523 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12524 "vget_lane");
12525 case NEON::BI__builtin_neon_vget_lane_f64:
12526 Ops[0] =
12527 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12528 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12529 "vget_lane");
12530 case NEON::BI__builtin_neon_vgetq_lane_f32:
12531 case NEON::BI__builtin_neon_vdups_laneq_f32:
12532 Ops[0] =
12533 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12534 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12535 "vgetq_lane");
12536 case NEON::BI__builtin_neon_vgetq_lane_f64:
12537 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12538 Ops[0] =
12539 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12540 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12541 "vgetq_lane");
12542 case NEON::BI__builtin_neon_vaddh_f16:
12543 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12544 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12545 case NEON::BI__builtin_neon_vsubh_f16:
12546 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12547 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12548 case NEON::BI__builtin_neon_vmulh_f16:
12549 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12550 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12551 case NEON::BI__builtin_neon_vdivh_f16:
12552 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12553 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12554 case NEON::BI__builtin_neon_vfmah_f16:
12555 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12557 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12558 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12559 case NEON::BI__builtin_neon_vfmsh_f16: {
12560 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12561
12562 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12564 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12565 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12566 }
12567 case NEON::BI__builtin_neon_vaddd_s64:
12568 case NEON::BI__builtin_neon_vaddd_u64:
12569 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12570 case NEON::BI__builtin_neon_vsubd_s64:
12571 case NEON::BI__builtin_neon_vsubd_u64:
12572 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12573 case NEON::BI__builtin_neon_vqdmlalh_s16:
12574 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12575 SmallVector<Value *, 2> ProductOps;
12576 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12577 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12578 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12579 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12580 ProductOps, "vqdmlXl");
12581 Constant *CI = ConstantInt::get(SizeTy, 0);
12582 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12583
12584 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12585 ? Intrinsic::aarch64_neon_sqadd
12586 : Intrinsic::aarch64_neon_sqsub;
12587 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12588 }
12589 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12590 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12591 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12592 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12593 Ops, "vqshlu_n");
12594 }
12595 case NEON::BI__builtin_neon_vqshld_n_u64:
12596 case NEON::BI__builtin_neon_vqshld_n_s64: {
12597 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12598 ? Intrinsic::aarch64_neon_uqshl
12599 : Intrinsic::aarch64_neon_sqshl;
12600 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12601 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12602 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12603 }
12604 case NEON::BI__builtin_neon_vrshrd_n_u64:
12605 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12606 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12607 ? Intrinsic::aarch64_neon_urshl
12608 : Intrinsic::aarch64_neon_srshl;
12609 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12610 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12611 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12612 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12613 }
12614 case NEON::BI__builtin_neon_vrsrad_n_u64:
12615 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12616 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12617 ? Intrinsic::aarch64_neon_urshl
12618 : Intrinsic::aarch64_neon_srshl;
12619 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12620 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12621 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12622 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12623 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12624 }
12625 case NEON::BI__builtin_neon_vshld_n_s64:
12626 case NEON::BI__builtin_neon_vshld_n_u64: {
12627 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12628 return Builder.CreateShl(
12629 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12630 }
12631 case NEON::BI__builtin_neon_vshrd_n_s64: {
12632 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12633 return Builder.CreateAShr(
12634 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12635 Amt->getZExtValue())),
12636 "shrd_n");
12637 }
12638 case NEON::BI__builtin_neon_vshrd_n_u64: {
12639 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12640 uint64_t ShiftAmt = Amt->getZExtValue();
12641 // Right-shifting an unsigned value by its size yields 0.
12642 if (ShiftAmt == 64)
12643 return ConstantInt::get(Int64Ty, 0);
12644 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12645 "shrd_n");
12646 }
12647 case NEON::BI__builtin_neon_vsrad_n_s64: {
12648 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12649 Ops[1] = Builder.CreateAShr(
12650 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12651 Amt->getZExtValue())),
12652 "shrd_n");
12653 return Builder.CreateAdd(Ops[0], Ops[1]);
12654 }
12655 case NEON::BI__builtin_neon_vsrad_n_u64: {
12656 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12657 uint64_t ShiftAmt = Amt->getZExtValue();
12658 // Right-shifting an unsigned value by its size yields 0.
12659 // As Op + 0 = Op, return Ops[0] directly.
12660 if (ShiftAmt == 64)
12661 return Ops[0];
12662 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12663 "shrd_n");
12664 return Builder.CreateAdd(Ops[0], Ops[1]);
12665 }
12666 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12667 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12668 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12669 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12670 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12671 "lane");
12672 SmallVector<Value *, 2> ProductOps;
12673 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12674 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12675 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12676 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12677 ProductOps, "vqdmlXl");
12678 Constant *CI = ConstantInt::get(SizeTy, 0);
12679 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12680 Ops.pop_back();
12681
12682 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12683 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12684 ? Intrinsic::aarch64_neon_sqadd
12685 : Intrinsic::aarch64_neon_sqsub;
12686 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12687 }
12688 case NEON::BI__builtin_neon_vqdmlals_s32:
12689 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12690 SmallVector<Value *, 2> ProductOps;
12691 ProductOps.push_back(Ops[1]);
12692 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12693 Ops[1] =
12694 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12695 ProductOps, "vqdmlXl");
12696
12697 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12698 ? Intrinsic::aarch64_neon_sqadd
12699 : Intrinsic::aarch64_neon_sqsub;
12700 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12701 }
12702 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12703 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12704 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12705 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12706 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12707 "lane");
12708 SmallVector<Value *, 2> ProductOps;
12709 ProductOps.push_back(Ops[1]);
12710 ProductOps.push_back(Ops[2]);
12711 Ops[1] =
12712 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12713 ProductOps, "vqdmlXl");
12714 Ops.pop_back();
12715
12716 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12717 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12718 ? Intrinsic::aarch64_neon_sqadd
12719 : Intrinsic::aarch64_neon_sqsub;
12720 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12721 }
12722 case NEON::BI__builtin_neon_vget_lane_bf16:
12723 case NEON::BI__builtin_neon_vduph_lane_bf16:
12724 case NEON::BI__builtin_neon_vduph_lane_f16: {
12725 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12726 "vget_lane");
12727 }
12728 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12729 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12730 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12731 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12732 "vgetq_lane");
12733 }
12734 case NEON::BI__builtin_neon_vcvt_bf16_f32: {
12735 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12736 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12737 return Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12738 }
12739 case NEON::BI__builtin_neon_vcvtq_low_bf16_f32: {
12740 SmallVector<int, 16> ConcatMask(8);
12741 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12742 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12743 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12744 llvm::Value *Trunc =
12745 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[0], V4F32), V4BF16);
12746 return Builder.CreateShuffleVector(
12747 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
12748 }
12749 case NEON::BI__builtin_neon_vcvtq_high_bf16_f32: {
12750 SmallVector<int, 16> ConcatMask(8);
12751 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
12752 SmallVector<int, 16> LoMask(4);
12753 std::iota(LoMask.begin(), LoMask.end(), 0);
12754 llvm::Type *V4F32 = FixedVectorType::get(Builder.getFloatTy(), 4);
12755 llvm::Type *V4BF16 = FixedVectorType::get(Builder.getBFloatTy(), 4);
12756 llvm::Type *V8BF16 = FixedVectorType::get(Builder.getBFloatTy(), 8);
12757 llvm::Value *Inactive = Builder.CreateShuffleVector(
12758 Builder.CreateBitCast(Ops[0], V8BF16), LoMask);
12759 llvm::Value *Trunc =
12760 Builder.CreateFPTrunc(Builder.CreateBitCast(Ops[1], V4F32), V4BF16);
12761 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
12762 }
12763
12764 case clang::AArch64::BI_InterlockedAdd:
12765 case clang::AArch64::BI_InterlockedAdd64: {
12766 Address DestAddr = CheckAtomicAlignment(*this, E);
12767 Value *Val = EmitScalarExpr(E->getArg(1));
12768 AtomicRMWInst *RMWI =
12769 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12770 llvm::AtomicOrdering::SequentiallyConsistent);
12771 return Builder.CreateAdd(RMWI, Val);
12772 }
12773 }
12774
12775 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12776 llvm::Type *Ty = VTy;
12777 if (!Ty)
12778 return nullptr;
12779
12780 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12781 // defer to common code if it's been added to our special map.
12784
12785 if (Builtin)
12787 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12788 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12789 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12790
12791 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12792 return V;
12793
12794 unsigned Int;
12795 bool ExtractLow = false;
12796 bool ExtendLaneArg = false;
12797 switch (BuiltinID) {
12798 default: return nullptr;
12799 case NEON::BI__builtin_neon_vbsl_v:
12800 case NEON::BI__builtin_neon_vbslq_v: {
12801 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12802 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12803 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12804 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12805
12806 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12807 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12808 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12809 return Builder.CreateBitCast(Ops[0], Ty);
12810 }
12811 case NEON::BI__builtin_neon_vfma_lane_v:
12812 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12813 // The ARM builtins (and instructions) have the addend as the first
12814 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12815 Value *Addend = Ops[0];
12816 Value *Multiplicand = Ops[1];
12817 Value *LaneSource = Ops[2];
12818 Ops[0] = Multiplicand;
12819 Ops[1] = LaneSource;
12820 Ops[2] = Addend;
12821
12822 // Now adjust things to handle the lane access.
12823 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12824 ? llvm::FixedVectorType::get(VTy->getElementType(),
12825 VTy->getNumElements() / 2)
12826 : VTy;
12827 llvm::Constant *cst = cast<Constant>(Ops[3]);
12828 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12829 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12830 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12831
12832 Ops.pop_back();
12833 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12834 : Intrinsic::fma;
12835 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12836 }
12837 case NEON::BI__builtin_neon_vfma_laneq_v: {
12838 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12839 // v1f64 fma should be mapped to Neon scalar f64 fma
12840 if (VTy && VTy->getElementType() == DoubleTy) {
12841 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12842 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12843 llvm::FixedVectorType *VTy =
12845 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12846 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12847 Value *Result;
12849 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12850 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12851 return Builder.CreateBitCast(Result, Ty);
12852 }
12853 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12854 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12855
12856 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12857 VTy->getNumElements() * 2);
12858 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12859 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12860 cast<ConstantInt>(Ops[3]));
12861 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12862
12864 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12865 {Ops[2], Ops[1], Ops[0]});
12866 }
12867 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12868 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12869 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12870
12871 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12872 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12874 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12875 {Ops[2], Ops[1], Ops[0]});
12876 }
12877 case NEON::BI__builtin_neon_vfmah_lane_f16:
12878 case NEON::BI__builtin_neon_vfmas_lane_f32:
12879 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12880 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12881 case NEON::BI__builtin_neon_vfmad_lane_f64:
12882 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12883 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12884 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12885 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12887 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12888 {Ops[1], Ops[2], Ops[0]});
12889 }
12890 case NEON::BI__builtin_neon_vmull_v:
12891 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12892 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12893 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12894 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12895 case NEON::BI__builtin_neon_vmax_v:
12896 case NEON::BI__builtin_neon_vmaxq_v:
12897 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12898 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12899 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12900 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12901 case NEON::BI__builtin_neon_vmaxh_f16: {
12902 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12903 Int = Intrinsic::aarch64_neon_fmax;
12904 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12905 }
12906 case NEON::BI__builtin_neon_vmin_v:
12907 case NEON::BI__builtin_neon_vminq_v:
12908 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12909 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12910 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12911 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12912 case NEON::BI__builtin_neon_vminh_f16: {
12913 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12914 Int = Intrinsic::aarch64_neon_fmin;
12915 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12916 }
12917 case NEON::BI__builtin_neon_vabd_v:
12918 case NEON::BI__builtin_neon_vabdq_v:
12919 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12920 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12921 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12922 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12923 case NEON::BI__builtin_neon_vpadal_v:
12924 case NEON::BI__builtin_neon_vpadalq_v: {
12925 unsigned ArgElts = VTy->getNumElements();
12926 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12927 unsigned BitWidth = EltTy->getBitWidth();
12928 auto *ArgTy = llvm::FixedVectorType::get(
12929 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12930 llvm::Type* Tys[2] = { VTy, ArgTy };
12931 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12933 TmpOps.push_back(Ops[1]);
12934 Function *F = CGM.getIntrinsic(Int, Tys);
12935 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12936 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12937 return Builder.CreateAdd(tmp, addend);
12938 }
12939 case NEON::BI__builtin_neon_vpmin_v:
12940 case NEON::BI__builtin_neon_vpminq_v:
12941 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12942 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12943 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12944 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12945 case NEON::BI__builtin_neon_vpmax_v:
12946 case NEON::BI__builtin_neon_vpmaxq_v:
12947 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12948 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12949 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12950 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12951 case NEON::BI__builtin_neon_vminnm_v:
12952 case NEON::BI__builtin_neon_vminnmq_v:
12953 Int = Intrinsic::aarch64_neon_fminnm;
12954 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12955 case NEON::BI__builtin_neon_vminnmh_f16:
12956 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12957 Int = Intrinsic::aarch64_neon_fminnm;
12958 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12959 case NEON::BI__builtin_neon_vmaxnm_v:
12960 case NEON::BI__builtin_neon_vmaxnmq_v:
12961 Int = Intrinsic::aarch64_neon_fmaxnm;
12962 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12963 case NEON::BI__builtin_neon_vmaxnmh_f16:
12964 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12965 Int = Intrinsic::aarch64_neon_fmaxnm;
12966 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12967 case NEON::BI__builtin_neon_vrecpss_f32: {
12968 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12969 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12970 Ops, "vrecps");
12971 }
12972 case NEON::BI__builtin_neon_vrecpsd_f64:
12973 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12974 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12975 Ops, "vrecps");
12976 case NEON::BI__builtin_neon_vrecpsh_f16:
12977 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12978 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12979 Ops, "vrecps");
12980 case NEON::BI__builtin_neon_vqshrun_n_v:
12981 Int = Intrinsic::aarch64_neon_sqshrun;
12982 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12983 case NEON::BI__builtin_neon_vqrshrun_n_v:
12984 Int = Intrinsic::aarch64_neon_sqrshrun;
12985 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12986 case NEON::BI__builtin_neon_vqshrn_n_v:
12987 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12988 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12989 case NEON::BI__builtin_neon_vrshrn_n_v:
12990 Int = Intrinsic::aarch64_neon_rshrn;
12991 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12992 case NEON::BI__builtin_neon_vqrshrn_n_v:
12993 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12994 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12995 case NEON::BI__builtin_neon_vrndah_f16: {
12996 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12997 Int = Builder.getIsFPConstrained()
12998 ? Intrinsic::experimental_constrained_round
12999 : Intrinsic::round;
13000 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
13001 }
13002 case NEON::BI__builtin_neon_vrnda_v:
13003 case NEON::BI__builtin_neon_vrndaq_v: {
13004 Int = Builder.getIsFPConstrained()
13005 ? Intrinsic::experimental_constrained_round
13006 : Intrinsic::round;
13007 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
13008 }
13009 case NEON::BI__builtin_neon_vrndih_f16: {
13010 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13011 Int = Builder.getIsFPConstrained()
13012 ? Intrinsic::experimental_constrained_nearbyint
13013 : Intrinsic::nearbyint;
13014 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
13015 }
13016 case NEON::BI__builtin_neon_vrndmh_f16: {
13017 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13018 Int = Builder.getIsFPConstrained()
13019 ? Intrinsic::experimental_constrained_floor
13020 : Intrinsic::floor;
13021 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
13022 }
13023 case NEON::BI__builtin_neon_vrndm_v:
13024 case NEON::BI__builtin_neon_vrndmq_v: {
13025 Int = Builder.getIsFPConstrained()
13026 ? Intrinsic::experimental_constrained_floor
13027 : Intrinsic::floor;
13028 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
13029 }
13030 case NEON::BI__builtin_neon_vrndnh_f16: {
13031 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13032 Int = Builder.getIsFPConstrained()
13033 ? Intrinsic::experimental_constrained_roundeven
13034 : Intrinsic::roundeven;
13035 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
13036 }
13037 case NEON::BI__builtin_neon_vrndn_v:
13038 case NEON::BI__builtin_neon_vrndnq_v: {
13039 Int = Builder.getIsFPConstrained()
13040 ? Intrinsic::experimental_constrained_roundeven
13041 : Intrinsic::roundeven;
13042 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
13043 }
13044 case NEON::BI__builtin_neon_vrndns_f32: {
13045 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13046 Int = Builder.getIsFPConstrained()
13047 ? Intrinsic::experimental_constrained_roundeven
13048 : Intrinsic::roundeven;
13049 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
13050 }
13051 case NEON::BI__builtin_neon_vrndph_f16: {
13052 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13053 Int = Builder.getIsFPConstrained()
13054 ? Intrinsic::experimental_constrained_ceil
13055 : Intrinsic::ceil;
13056 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
13057 }
13058 case NEON::BI__builtin_neon_vrndp_v:
13059 case NEON::BI__builtin_neon_vrndpq_v: {
13060 Int = Builder.getIsFPConstrained()
13061 ? Intrinsic::experimental_constrained_ceil
13062 : Intrinsic::ceil;
13063 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
13064 }
13065 case NEON::BI__builtin_neon_vrndxh_f16: {
13066 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13067 Int = Builder.getIsFPConstrained()
13068 ? Intrinsic::experimental_constrained_rint
13069 : Intrinsic::rint;
13070 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
13071 }
13072 case NEON::BI__builtin_neon_vrndx_v:
13073 case NEON::BI__builtin_neon_vrndxq_v: {
13074 Int = Builder.getIsFPConstrained()
13075 ? Intrinsic::experimental_constrained_rint
13076 : Intrinsic::rint;
13077 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
13078 }
13079 case NEON::BI__builtin_neon_vrndh_f16: {
13080 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13081 Int = Builder.getIsFPConstrained()
13082 ? Intrinsic::experimental_constrained_trunc
13083 : Intrinsic::trunc;
13084 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
13085 }
13086 case NEON::BI__builtin_neon_vrnd32x_f32:
13087 case NEON::BI__builtin_neon_vrnd32xq_f32:
13088 case NEON::BI__builtin_neon_vrnd32x_f64:
13089 case NEON::BI__builtin_neon_vrnd32xq_f64: {
13090 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13091 Int = Intrinsic::aarch64_neon_frint32x;
13092 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
13093 }
13094 case NEON::BI__builtin_neon_vrnd32z_f32:
13095 case NEON::BI__builtin_neon_vrnd32zq_f32:
13096 case NEON::BI__builtin_neon_vrnd32z_f64:
13097 case NEON::BI__builtin_neon_vrnd32zq_f64: {
13098 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13099 Int = Intrinsic::aarch64_neon_frint32z;
13100 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
13101 }
13102 case NEON::BI__builtin_neon_vrnd64x_f32:
13103 case NEON::BI__builtin_neon_vrnd64xq_f32:
13104 case NEON::BI__builtin_neon_vrnd64x_f64:
13105 case NEON::BI__builtin_neon_vrnd64xq_f64: {
13106 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13107 Int = Intrinsic::aarch64_neon_frint64x;
13108 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
13109 }
13110 case NEON::BI__builtin_neon_vrnd64z_f32:
13111 case NEON::BI__builtin_neon_vrnd64zq_f32:
13112 case NEON::BI__builtin_neon_vrnd64z_f64:
13113 case NEON::BI__builtin_neon_vrnd64zq_f64: {
13114 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13115 Int = Intrinsic::aarch64_neon_frint64z;
13116 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
13117 }
13118 case NEON::BI__builtin_neon_vrnd_v:
13119 case NEON::BI__builtin_neon_vrndq_v: {
13120 Int = Builder.getIsFPConstrained()
13121 ? Intrinsic::experimental_constrained_trunc
13122 : Intrinsic::trunc;
13123 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
13124 }
13125 case NEON::BI__builtin_neon_vcvt_f64_v:
13126 case NEON::BI__builtin_neon_vcvtq_f64_v:
13127 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13128 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
13129 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
13130 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
13131 case NEON::BI__builtin_neon_vcvt_f64_f32: {
13132 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
13133 "unexpected vcvt_f64_f32 builtin");
13134 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
13135 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13136
13137 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
13138 }
13139 case NEON::BI__builtin_neon_vcvt_f32_f64: {
13140 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
13141 "unexpected vcvt_f32_f64 builtin");
13142 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
13143 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13144
13145 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
13146 }
13147 case NEON::BI__builtin_neon_vcvt_s32_v:
13148 case NEON::BI__builtin_neon_vcvt_u32_v:
13149 case NEON::BI__builtin_neon_vcvt_s64_v:
13150 case NEON::BI__builtin_neon_vcvt_u64_v:
13151 case NEON::BI__builtin_neon_vcvt_s16_f16:
13152 case NEON::BI__builtin_neon_vcvt_u16_f16:
13153 case NEON::BI__builtin_neon_vcvtq_s32_v:
13154 case NEON::BI__builtin_neon_vcvtq_u32_v:
13155 case NEON::BI__builtin_neon_vcvtq_s64_v:
13156 case NEON::BI__builtin_neon_vcvtq_u64_v:
13157 case NEON::BI__builtin_neon_vcvtq_s16_f16:
13158 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
13159 Int =
13160 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
13161 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
13162 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
13163 }
13164 case NEON::BI__builtin_neon_vcvta_s16_f16:
13165 case NEON::BI__builtin_neon_vcvta_u16_f16:
13166 case NEON::BI__builtin_neon_vcvta_s32_v:
13167 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
13168 case NEON::BI__builtin_neon_vcvtaq_s32_v:
13169 case NEON::BI__builtin_neon_vcvta_u32_v:
13170 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
13171 case NEON::BI__builtin_neon_vcvtaq_u32_v:
13172 case NEON::BI__builtin_neon_vcvta_s64_v:
13173 case NEON::BI__builtin_neon_vcvtaq_s64_v:
13174 case NEON::BI__builtin_neon_vcvta_u64_v:
13175 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
13176 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
13177 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13178 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
13179 }
13180 case NEON::BI__builtin_neon_vcvtm_s16_f16:
13181 case NEON::BI__builtin_neon_vcvtm_s32_v:
13182 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
13183 case NEON::BI__builtin_neon_vcvtmq_s32_v:
13184 case NEON::BI__builtin_neon_vcvtm_u16_f16:
13185 case NEON::BI__builtin_neon_vcvtm_u32_v:
13186 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
13187 case NEON::BI__builtin_neon_vcvtmq_u32_v:
13188 case NEON::BI__builtin_neon_vcvtm_s64_v:
13189 case NEON::BI__builtin_neon_vcvtmq_s64_v:
13190 case NEON::BI__builtin_neon_vcvtm_u64_v:
13191 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
13192 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
13193 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13194 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
13195 }
13196 case NEON::BI__builtin_neon_vcvtn_s16_f16:
13197 case NEON::BI__builtin_neon_vcvtn_s32_v:
13198 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
13199 case NEON::BI__builtin_neon_vcvtnq_s32_v:
13200 case NEON::BI__builtin_neon_vcvtn_u16_f16:
13201 case NEON::BI__builtin_neon_vcvtn_u32_v:
13202 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
13203 case NEON::BI__builtin_neon_vcvtnq_u32_v:
13204 case NEON::BI__builtin_neon_vcvtn_s64_v:
13205 case NEON::BI__builtin_neon_vcvtnq_s64_v:
13206 case NEON::BI__builtin_neon_vcvtn_u64_v:
13207 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
13208 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
13209 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13210 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
13211 }
13212 case NEON::BI__builtin_neon_vcvtp_s16_f16:
13213 case NEON::BI__builtin_neon_vcvtp_s32_v:
13214 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
13215 case NEON::BI__builtin_neon_vcvtpq_s32_v:
13216 case NEON::BI__builtin_neon_vcvtp_u16_f16:
13217 case NEON::BI__builtin_neon_vcvtp_u32_v:
13218 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
13219 case NEON::BI__builtin_neon_vcvtpq_u32_v:
13220 case NEON::BI__builtin_neon_vcvtp_s64_v:
13221 case NEON::BI__builtin_neon_vcvtpq_s64_v:
13222 case NEON::BI__builtin_neon_vcvtp_u64_v:
13223 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
13224 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
13225 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13226 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
13227 }
13228 case NEON::BI__builtin_neon_vmulx_v:
13229 case NEON::BI__builtin_neon_vmulxq_v: {
13230 Int = Intrinsic::aarch64_neon_fmulx;
13231 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
13232 }
13233 case NEON::BI__builtin_neon_vmulxh_lane_f16:
13234 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
13235 // vmulx_lane should be mapped to Neon scalar mulx after
13236 // extracting the scalar element
13237 Ops.push_back(EmitScalarExpr(E->getArg(2)));
13238 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13239 Ops.pop_back();
13240 Int = Intrinsic::aarch64_neon_fmulx;
13241 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
13242 }
13243 case NEON::BI__builtin_neon_vmul_lane_v:
13244 case NEON::BI__builtin_neon_vmul_laneq_v: {
13245 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
13246 bool Quad = false;
13247 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
13248 Quad = true;
13249 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13250 llvm::FixedVectorType *VTy =
13252 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13253 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13254 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
13255 return Builder.CreateBitCast(Result, Ty);
13256 }
13257 case NEON::BI__builtin_neon_vnegd_s64:
13258 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
13259 case NEON::BI__builtin_neon_vnegh_f16:
13260 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
13261 case NEON::BI__builtin_neon_vpmaxnm_v:
13262 case NEON::BI__builtin_neon_vpmaxnmq_v: {
13263 Int = Intrinsic::aarch64_neon_fmaxnmp;
13264 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
13265 }
13266 case NEON::BI__builtin_neon_vpminnm_v:
13267 case NEON::BI__builtin_neon_vpminnmq_v: {
13268 Int = Intrinsic::aarch64_neon_fminnmp;
13269 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
13270 }
13271 case NEON::BI__builtin_neon_vsqrth_f16: {
13272 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13273 Int = Builder.getIsFPConstrained()
13274 ? Intrinsic::experimental_constrained_sqrt
13275 : Intrinsic::sqrt;
13276 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
13277 }
13278 case NEON::BI__builtin_neon_vsqrt_v:
13279 case NEON::BI__builtin_neon_vsqrtq_v: {
13280 Int = Builder.getIsFPConstrained()
13281 ? Intrinsic::experimental_constrained_sqrt
13282 : Intrinsic::sqrt;
13283 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13284 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
13285 }
13286 case NEON::BI__builtin_neon_vrbit_v:
13287 case NEON::BI__builtin_neon_vrbitq_v: {
13288 Int = Intrinsic::bitreverse;
13289 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
13290 }
13291 case NEON::BI__builtin_neon_vaddv_u8:
13292 // FIXME: These are handled by the AArch64 scalar code.
13293 usgn = true;
13294 [[fallthrough]];
13295 case NEON::BI__builtin_neon_vaddv_s8: {
13296 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13297 Ty = Int32Ty;
13298 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13299 llvm::Type *Tys[2] = { Ty, VTy };
13300 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13301 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13302 return Builder.CreateTrunc(Ops[0], Int8Ty);
13303 }
13304 case NEON::BI__builtin_neon_vaddv_u16:
13305 usgn = true;
13306 [[fallthrough]];
13307 case NEON::BI__builtin_neon_vaddv_s16: {
13308 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13309 Ty = Int32Ty;
13310 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13311 llvm::Type *Tys[2] = { Ty, VTy };
13312 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13313 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13314 return Builder.CreateTrunc(Ops[0], Int16Ty);
13315 }
13316 case NEON::BI__builtin_neon_vaddvq_u8:
13317 usgn = true;
13318 [[fallthrough]];
13319 case NEON::BI__builtin_neon_vaddvq_s8: {
13320 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13321 Ty = Int32Ty;
13322 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13323 llvm::Type *Tys[2] = { Ty, VTy };
13324 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13325 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13326 return Builder.CreateTrunc(Ops[0], Int8Ty);
13327 }
13328 case NEON::BI__builtin_neon_vaddvq_u16:
13329 usgn = true;
13330 [[fallthrough]];
13331 case NEON::BI__builtin_neon_vaddvq_s16: {
13332 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13333 Ty = Int32Ty;
13334 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13335 llvm::Type *Tys[2] = { Ty, VTy };
13336 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13337 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13338 return Builder.CreateTrunc(Ops[0], Int16Ty);
13339 }
13340 case NEON::BI__builtin_neon_vmaxv_u8: {
13341 Int = Intrinsic::aarch64_neon_umaxv;
13342 Ty = Int32Ty;
13343 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13344 llvm::Type *Tys[2] = { Ty, VTy };
13345 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13346 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13347 return Builder.CreateTrunc(Ops[0], Int8Ty);
13348 }
13349 case NEON::BI__builtin_neon_vmaxv_u16: {
13350 Int = Intrinsic::aarch64_neon_umaxv;
13351 Ty = Int32Ty;
13352 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13353 llvm::Type *Tys[2] = { Ty, VTy };
13354 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13355 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13356 return Builder.CreateTrunc(Ops[0], Int16Ty);
13357 }
13358 case NEON::BI__builtin_neon_vmaxvq_u8: {
13359 Int = Intrinsic::aarch64_neon_umaxv;
13360 Ty = Int32Ty;
13361 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13362 llvm::Type *Tys[2] = { Ty, VTy };
13363 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13364 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13365 return Builder.CreateTrunc(Ops[0], Int8Ty);
13366 }
13367 case NEON::BI__builtin_neon_vmaxvq_u16: {
13368 Int = Intrinsic::aarch64_neon_umaxv;
13369 Ty = Int32Ty;
13370 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13371 llvm::Type *Tys[2] = { Ty, VTy };
13372 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13373 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13374 return Builder.CreateTrunc(Ops[0], Int16Ty);
13375 }
13376 case NEON::BI__builtin_neon_vmaxv_s8: {
13377 Int = Intrinsic::aarch64_neon_smaxv;
13378 Ty = Int32Ty;
13379 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13380 llvm::Type *Tys[2] = { Ty, VTy };
13381 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13382 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13383 return Builder.CreateTrunc(Ops[0], Int8Ty);
13384 }
13385 case NEON::BI__builtin_neon_vmaxv_s16: {
13386 Int = Intrinsic::aarch64_neon_smaxv;
13387 Ty = Int32Ty;
13388 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13389 llvm::Type *Tys[2] = { Ty, VTy };
13390 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13391 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13392 return Builder.CreateTrunc(Ops[0], Int16Ty);
13393 }
13394 case NEON::BI__builtin_neon_vmaxvq_s8: {
13395 Int = Intrinsic::aarch64_neon_smaxv;
13396 Ty = Int32Ty;
13397 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13398 llvm::Type *Tys[2] = { Ty, VTy };
13399 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13400 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13401 return Builder.CreateTrunc(Ops[0], Int8Ty);
13402 }
13403 case NEON::BI__builtin_neon_vmaxvq_s16: {
13404 Int = Intrinsic::aarch64_neon_smaxv;
13405 Ty = Int32Ty;
13406 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13407 llvm::Type *Tys[2] = { Ty, VTy };
13408 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13409 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13410 return Builder.CreateTrunc(Ops[0], Int16Ty);
13411 }
13412 case NEON::BI__builtin_neon_vmaxv_f16: {
13413 Int = Intrinsic::aarch64_neon_fmaxv;
13414 Ty = HalfTy;
13415 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13416 llvm::Type *Tys[2] = { Ty, VTy };
13417 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13418 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13419 return Builder.CreateTrunc(Ops[0], HalfTy);
13420 }
13421 case NEON::BI__builtin_neon_vmaxvq_f16: {
13422 Int = Intrinsic::aarch64_neon_fmaxv;
13423 Ty = HalfTy;
13424 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13425 llvm::Type *Tys[2] = { Ty, VTy };
13426 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13427 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13428 return Builder.CreateTrunc(Ops[0], HalfTy);
13429 }
13430 case NEON::BI__builtin_neon_vminv_u8: {
13431 Int = Intrinsic::aarch64_neon_uminv;
13432 Ty = Int32Ty;
13433 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13434 llvm::Type *Tys[2] = { Ty, VTy };
13435 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13436 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13437 return Builder.CreateTrunc(Ops[0], Int8Ty);
13438 }
13439 case NEON::BI__builtin_neon_vminv_u16: {
13440 Int = Intrinsic::aarch64_neon_uminv;
13441 Ty = Int32Ty;
13442 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13443 llvm::Type *Tys[2] = { Ty, VTy };
13444 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13445 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13446 return Builder.CreateTrunc(Ops[0], Int16Ty);
13447 }
13448 case NEON::BI__builtin_neon_vminvq_u8: {
13449 Int = Intrinsic::aarch64_neon_uminv;
13450 Ty = Int32Ty;
13451 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13452 llvm::Type *Tys[2] = { Ty, VTy };
13453 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13454 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13455 return Builder.CreateTrunc(Ops[0], Int8Ty);
13456 }
13457 case NEON::BI__builtin_neon_vminvq_u16: {
13458 Int = Intrinsic::aarch64_neon_uminv;
13459 Ty = Int32Ty;
13460 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13461 llvm::Type *Tys[2] = { Ty, VTy };
13462 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13463 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13464 return Builder.CreateTrunc(Ops[0], Int16Ty);
13465 }
13466 case NEON::BI__builtin_neon_vminv_s8: {
13467 Int = Intrinsic::aarch64_neon_sminv;
13468 Ty = Int32Ty;
13469 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13470 llvm::Type *Tys[2] = { Ty, VTy };
13471 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13472 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13473 return Builder.CreateTrunc(Ops[0], Int8Ty);
13474 }
13475 case NEON::BI__builtin_neon_vminv_s16: {
13476 Int = Intrinsic::aarch64_neon_sminv;
13477 Ty = Int32Ty;
13478 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13479 llvm::Type *Tys[2] = { Ty, VTy };
13480 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13481 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13482 return Builder.CreateTrunc(Ops[0], Int16Ty);
13483 }
13484 case NEON::BI__builtin_neon_vminvq_s8: {
13485 Int = Intrinsic::aarch64_neon_sminv;
13486 Ty = Int32Ty;
13487 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13488 llvm::Type *Tys[2] = { Ty, VTy };
13489 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13490 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13491 return Builder.CreateTrunc(Ops[0], Int8Ty);
13492 }
13493 case NEON::BI__builtin_neon_vminvq_s16: {
13494 Int = Intrinsic::aarch64_neon_sminv;
13495 Ty = Int32Ty;
13496 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13497 llvm::Type *Tys[2] = { Ty, VTy };
13498 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13499 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13500 return Builder.CreateTrunc(Ops[0], Int16Ty);
13501 }
13502 case NEON::BI__builtin_neon_vminv_f16: {
13503 Int = Intrinsic::aarch64_neon_fminv;
13504 Ty = HalfTy;
13505 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13506 llvm::Type *Tys[2] = { Ty, VTy };
13507 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13508 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13509 return Builder.CreateTrunc(Ops[0], HalfTy);
13510 }
13511 case NEON::BI__builtin_neon_vminvq_f16: {
13512 Int = Intrinsic::aarch64_neon_fminv;
13513 Ty = HalfTy;
13514 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13515 llvm::Type *Tys[2] = { Ty, VTy };
13516 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13517 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13518 return Builder.CreateTrunc(Ops[0], HalfTy);
13519 }
13520 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13521 Int = Intrinsic::aarch64_neon_fmaxnmv;
13522 Ty = HalfTy;
13523 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13524 llvm::Type *Tys[2] = { Ty, VTy };
13525 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13526 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13527 return Builder.CreateTrunc(Ops[0], HalfTy);
13528 }
13529 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13530 Int = Intrinsic::aarch64_neon_fmaxnmv;
13531 Ty = HalfTy;
13532 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13533 llvm::Type *Tys[2] = { Ty, VTy };
13534 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13535 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13536 return Builder.CreateTrunc(Ops[0], HalfTy);
13537 }
13538 case NEON::BI__builtin_neon_vminnmv_f16: {
13539 Int = Intrinsic::aarch64_neon_fminnmv;
13540 Ty = HalfTy;
13541 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13542 llvm::Type *Tys[2] = { Ty, VTy };
13543 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13544 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13545 return Builder.CreateTrunc(Ops[0], HalfTy);
13546 }
13547 case NEON::BI__builtin_neon_vminnmvq_f16: {
13548 Int = Intrinsic::aarch64_neon_fminnmv;
13549 Ty = HalfTy;
13550 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13551 llvm::Type *Tys[2] = { Ty, VTy };
13552 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13553 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13554 return Builder.CreateTrunc(Ops[0], HalfTy);
13555 }
13556 case NEON::BI__builtin_neon_vmul_n_f64: {
13557 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13558 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13559 return Builder.CreateFMul(Ops[0], RHS);
13560 }
13561 case NEON::BI__builtin_neon_vaddlv_u8: {
13562 Int = Intrinsic::aarch64_neon_uaddlv;
13563 Ty = Int32Ty;
13564 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13565 llvm::Type *Tys[2] = { Ty, VTy };
13566 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13567 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13568 return Builder.CreateTrunc(Ops[0], Int16Ty);
13569 }
13570 case NEON::BI__builtin_neon_vaddlv_u16: {
13571 Int = Intrinsic::aarch64_neon_uaddlv;
13572 Ty = Int32Ty;
13573 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13574 llvm::Type *Tys[2] = { Ty, VTy };
13575 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13576 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13577 }
13578 case NEON::BI__builtin_neon_vaddlvq_u8: {
13579 Int = Intrinsic::aarch64_neon_uaddlv;
13580 Ty = Int32Ty;
13581 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13582 llvm::Type *Tys[2] = { Ty, VTy };
13583 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13584 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13585 return Builder.CreateTrunc(Ops[0], Int16Ty);
13586 }
13587 case NEON::BI__builtin_neon_vaddlvq_u16: {
13588 Int = Intrinsic::aarch64_neon_uaddlv;
13589 Ty = Int32Ty;
13590 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13591 llvm::Type *Tys[2] = { Ty, VTy };
13592 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13593 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13594 }
13595 case NEON::BI__builtin_neon_vaddlv_s8: {
13596 Int = Intrinsic::aarch64_neon_saddlv;
13597 Ty = Int32Ty;
13598 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13599 llvm::Type *Tys[2] = { Ty, VTy };
13600 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13601 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13602 return Builder.CreateTrunc(Ops[0], Int16Ty);
13603 }
13604 case NEON::BI__builtin_neon_vaddlv_s16: {
13605 Int = Intrinsic::aarch64_neon_saddlv;
13606 Ty = Int32Ty;
13607 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13608 llvm::Type *Tys[2] = { Ty, VTy };
13609 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13610 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13611 }
13612 case NEON::BI__builtin_neon_vaddlvq_s8: {
13613 Int = Intrinsic::aarch64_neon_saddlv;
13614 Ty = Int32Ty;
13615 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13616 llvm::Type *Tys[2] = { Ty, VTy };
13617 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13618 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13619 return Builder.CreateTrunc(Ops[0], Int16Ty);
13620 }
13621 case NEON::BI__builtin_neon_vaddlvq_s16: {
13622 Int = Intrinsic::aarch64_neon_saddlv;
13623 Ty = Int32Ty;
13624 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13625 llvm::Type *Tys[2] = { Ty, VTy };
13626 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13627 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13628 }
13629 case NEON::BI__builtin_neon_vsri_n_v:
13630 case NEON::BI__builtin_neon_vsriq_n_v: {
13631 Int = Intrinsic::aarch64_neon_vsri;
13632 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13633 return EmitNeonCall(Intrin, Ops, "vsri_n");
13634 }
13635 case NEON::BI__builtin_neon_vsli_n_v:
13636 case NEON::BI__builtin_neon_vsliq_n_v: {
13637 Int = Intrinsic::aarch64_neon_vsli;
13638 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13639 return EmitNeonCall(Intrin, Ops, "vsli_n");
13640 }
13641 case NEON::BI__builtin_neon_vsra_n_v:
13642 case NEON::BI__builtin_neon_vsraq_n_v:
13643 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13644 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13645 return Builder.CreateAdd(Ops[0], Ops[1]);
13646 case NEON::BI__builtin_neon_vrsra_n_v:
13647 case NEON::BI__builtin_neon_vrsraq_n_v: {
13648 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13650 TmpOps.push_back(Ops[1]);
13651 TmpOps.push_back(Ops[2]);
13652 Function* F = CGM.getIntrinsic(Int, Ty);
13653 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13654 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13655 return Builder.CreateAdd(Ops[0], tmp);
13656 }
13657 case NEON::BI__builtin_neon_vld1_v:
13658 case NEON::BI__builtin_neon_vld1q_v: {
13659 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13660 }
13661 case NEON::BI__builtin_neon_vst1_v:
13662 case NEON::BI__builtin_neon_vst1q_v:
13663 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13664 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13665 case NEON::BI__builtin_neon_vld1_lane_v:
13666 case NEON::BI__builtin_neon_vld1q_lane_v: {
13667 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13668 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13669 PtrOp0.getAlignment());
13670 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13671 }
13672 case NEON::BI__builtin_neon_vldap1_lane_s64:
13673 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13674 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13675 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13676 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13677 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13678 Ops[0] = LI;
13679 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13680 }
13681 case NEON::BI__builtin_neon_vld1_dup_v:
13682 case NEON::BI__builtin_neon_vld1q_dup_v: {
13683 Value *V = PoisonValue::get(Ty);
13684 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13685 PtrOp0.getAlignment());
13686 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13687 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13688 return EmitNeonSplat(Ops[0], CI);
13689 }
13690 case NEON::BI__builtin_neon_vst1_lane_v:
13691 case NEON::BI__builtin_neon_vst1q_lane_v:
13692 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13693 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13694 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13695 case NEON::BI__builtin_neon_vstl1_lane_s64:
13696 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13697 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13698 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13699 llvm::StoreInst *SI =
13700 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13701 SI->setAtomic(llvm::AtomicOrdering::Release);
13702 return SI;
13703 }
13704 case NEON::BI__builtin_neon_vld2_v:
13705 case NEON::BI__builtin_neon_vld2q_v: {
13706 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13707 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13708 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13709 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13710 }
13711 case NEON::BI__builtin_neon_vld3_v:
13712 case NEON::BI__builtin_neon_vld3q_v: {
13713 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13714 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13715 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13716 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13717 }
13718 case NEON::BI__builtin_neon_vld4_v:
13719 case NEON::BI__builtin_neon_vld4q_v: {
13720 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13721 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13722 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13723 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13724 }
13725 case NEON::BI__builtin_neon_vld2_dup_v:
13726 case NEON::BI__builtin_neon_vld2q_dup_v: {
13727 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13728 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13729 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13730 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13731 }
13732 case NEON::BI__builtin_neon_vld3_dup_v:
13733 case NEON::BI__builtin_neon_vld3q_dup_v: {
13734 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13735 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13736 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13737 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13738 }
13739 case NEON::BI__builtin_neon_vld4_dup_v:
13740 case NEON::BI__builtin_neon_vld4q_dup_v: {
13741 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13742 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13743 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13744 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13745 }
13746 case NEON::BI__builtin_neon_vld2_lane_v:
13747 case NEON::BI__builtin_neon_vld2q_lane_v: {
13748 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13749 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13750 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13751 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13752 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13753 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13754 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13755 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13756 }
13757 case NEON::BI__builtin_neon_vld3_lane_v:
13758 case NEON::BI__builtin_neon_vld3q_lane_v: {
13759 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13760 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13761 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13762 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13763 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13764 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13765 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13766 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13767 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13768 }
13769 case NEON::BI__builtin_neon_vld4_lane_v:
13770 case NEON::BI__builtin_neon_vld4q_lane_v: {
13771 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13772 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13773 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13774 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13775 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13776 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13777 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13778 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13779 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13780 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13781 }
13782 case NEON::BI__builtin_neon_vst2_v:
13783 case NEON::BI__builtin_neon_vst2q_v: {
13784 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13785 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13786 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13787 Ops, "");
13788 }
13789 case NEON::BI__builtin_neon_vst2_lane_v:
13790 case NEON::BI__builtin_neon_vst2q_lane_v: {
13791 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13792 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13793 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13794 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13795 Ops, "");
13796 }
13797 case NEON::BI__builtin_neon_vst3_v:
13798 case NEON::BI__builtin_neon_vst3q_v: {
13799 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13800 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13801 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13802 Ops, "");
13803 }
13804 case NEON::BI__builtin_neon_vst3_lane_v:
13805 case NEON::BI__builtin_neon_vst3q_lane_v: {
13806 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13807 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13808 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13809 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13810 Ops, "");
13811 }
13812 case NEON::BI__builtin_neon_vst4_v:
13813 case NEON::BI__builtin_neon_vst4q_v: {
13814 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13815 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13816 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13817 Ops, "");
13818 }
13819 case NEON::BI__builtin_neon_vst4_lane_v:
13820 case NEON::BI__builtin_neon_vst4q_lane_v: {
13821 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13822 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13823 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13824 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13825 Ops, "");
13826 }
13827 case NEON::BI__builtin_neon_vtrn_v:
13828 case NEON::BI__builtin_neon_vtrnq_v: {
13829 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13830 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13831 Value *SV = nullptr;
13832
13833 for (unsigned vi = 0; vi != 2; ++vi) {
13834 SmallVector<int, 16> Indices;
13835 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13836 Indices.push_back(i+vi);
13837 Indices.push_back(i+e+vi);
13838 }
13839 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13840 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13841 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13842 }
13843 return SV;
13844 }
13845 case NEON::BI__builtin_neon_vuzp_v:
13846 case NEON::BI__builtin_neon_vuzpq_v: {
13847 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13848 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13849 Value *SV = nullptr;
13850
13851 for (unsigned vi = 0; vi != 2; ++vi) {
13852 SmallVector<int, 16> Indices;
13853 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13854 Indices.push_back(2*i+vi);
13855
13856 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13857 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13858 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13859 }
13860 return SV;
13861 }
13862 case NEON::BI__builtin_neon_vzip_v:
13863 case NEON::BI__builtin_neon_vzipq_v: {
13864 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13865 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13866 Value *SV = nullptr;
13867
13868 for (unsigned vi = 0; vi != 2; ++vi) {
13869 SmallVector<int, 16> Indices;
13870 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13871 Indices.push_back((i + vi*e) >> 1);
13872 Indices.push_back(((i + vi*e) >> 1)+e);
13873 }
13874 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13875 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13876 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13877 }
13878 return SV;
13879 }
13880 case NEON::BI__builtin_neon_vqtbl1q_v: {
13881 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13882 Ops, "vtbl1");
13883 }
13884 case NEON::BI__builtin_neon_vqtbl2q_v: {
13885 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13886 Ops, "vtbl2");
13887 }
13888 case NEON::BI__builtin_neon_vqtbl3q_v: {
13889 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13890 Ops, "vtbl3");
13891 }
13892 case NEON::BI__builtin_neon_vqtbl4q_v: {
13893 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13894 Ops, "vtbl4");
13895 }
13896 case NEON::BI__builtin_neon_vqtbx1q_v: {
13897 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13898 Ops, "vtbx1");
13899 }
13900 case NEON::BI__builtin_neon_vqtbx2q_v: {
13901 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13902 Ops, "vtbx2");
13903 }
13904 case NEON::BI__builtin_neon_vqtbx3q_v: {
13905 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13906 Ops, "vtbx3");
13907 }
13908 case NEON::BI__builtin_neon_vqtbx4q_v: {
13909 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13910 Ops, "vtbx4");
13911 }
13912 case NEON::BI__builtin_neon_vsqadd_v:
13913 case NEON::BI__builtin_neon_vsqaddq_v: {
13914 Int = Intrinsic::aarch64_neon_usqadd;
13915 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13916 }
13917 case NEON::BI__builtin_neon_vuqadd_v:
13918 case NEON::BI__builtin_neon_vuqaddq_v: {
13919 Int = Intrinsic::aarch64_neon_suqadd;
13920 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13921 }
13922
13923 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
13924 case NEON::BI__builtin_neon_vluti2_laneq_f16:
13925 case NEON::BI__builtin_neon_vluti2_laneq_p16:
13926 case NEON::BI__builtin_neon_vluti2_laneq_p8:
13927 case NEON::BI__builtin_neon_vluti2_laneq_s16:
13928 case NEON::BI__builtin_neon_vluti2_laneq_s8:
13929 case NEON::BI__builtin_neon_vluti2_laneq_u16:
13930 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
13931 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13932 llvm::Type *Tys[2];
13933 Tys[0] = Ty;
13934 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13935 /*isQuad*/ false));
13936 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13937 }
13938 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
13939 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
13940 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
13941 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
13942 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
13943 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
13944 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
13945 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
13946 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13947 llvm::Type *Tys[2];
13948 Tys[0] = Ty;
13949 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13950 /*isQuad*/ true));
13951 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13952 }
13953 case NEON::BI__builtin_neon_vluti2_lane_bf16:
13954 case NEON::BI__builtin_neon_vluti2_lane_f16:
13955 case NEON::BI__builtin_neon_vluti2_lane_p16:
13956 case NEON::BI__builtin_neon_vluti2_lane_p8:
13957 case NEON::BI__builtin_neon_vluti2_lane_s16:
13958 case NEON::BI__builtin_neon_vluti2_lane_s8:
13959 case NEON::BI__builtin_neon_vluti2_lane_u16:
13960 case NEON::BI__builtin_neon_vluti2_lane_u8: {
13961 Int = Intrinsic::aarch64_neon_vluti2_lane;
13962 llvm::Type *Tys[2];
13963 Tys[0] = Ty;
13964 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13965 /*isQuad*/ false));
13966 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13967 }
13968 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
13969 case NEON::BI__builtin_neon_vluti2q_lane_f16:
13970 case NEON::BI__builtin_neon_vluti2q_lane_p16:
13971 case NEON::BI__builtin_neon_vluti2q_lane_p8:
13972 case NEON::BI__builtin_neon_vluti2q_lane_s16:
13973 case NEON::BI__builtin_neon_vluti2q_lane_s8:
13974 case NEON::BI__builtin_neon_vluti2q_lane_u16:
13975 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
13976 Int = Intrinsic::aarch64_neon_vluti2_lane;
13977 llvm::Type *Tys[2];
13978 Tys[0] = Ty;
13979 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13980 /*isQuad*/ true));
13981 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13982 }
13983 case NEON::BI__builtin_neon_vluti4q_lane_p8:
13984 case NEON::BI__builtin_neon_vluti4q_lane_s8:
13985 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
13986 Int = Intrinsic::aarch64_neon_vluti4q_lane;
13987 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
13988 }
13989 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
13990 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
13991 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
13992 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
13993 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
13994 }
13995 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
13996 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
13997 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
13998 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
13999 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
14000 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
14001 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
14002 }
14003 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
14004 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
14005 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
14006 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
14007 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
14008 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
14009 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
14010 }
14011 case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
14012 ExtractLow = true;
14013 LLVM_FALLTHROUGH;
14014 case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
14015 case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm:
14016 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
14017 llvm::FixedVectorType::get(BFloatTy, 8),
14018 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
14019 case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
14020 ExtractLow = true;
14021 LLVM_FALLTHROUGH;
14022 case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14023 case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm:
14024 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
14025 llvm::FixedVectorType::get(BFloatTy, 8),
14026 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
14027 case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
14028 ExtractLow = true;
14029 LLVM_FALLTHROUGH;
14030 case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14031 case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm:
14032 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl1,
14033 llvm::FixedVectorType::get(HalfTy, 8),
14034 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt1");
14035 case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
14036 ExtractLow = true;
14037 LLVM_FALLTHROUGH;
14038 case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14039 case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm:
14040 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_cvtl2,
14041 llvm::FixedVectorType::get(HalfTy, 8),
14042 Ops[0]->getType(), ExtractLow, Ops, E, "vbfcvt2");
14043 case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm:
14044 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14045 llvm::FixedVectorType::get(Int8Ty, 8),
14046 Ops[0]->getType(), false, Ops, E, "vfcvtn");
14047 case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm:
14048 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14049 llvm::FixedVectorType::get(Int8Ty, 8),
14050 llvm::FixedVectorType::get(HalfTy, 4), false, Ops,
14051 E, "vfcvtn");
14052 case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm:
14053 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn,
14054 llvm::FixedVectorType::get(Int8Ty, 16),
14055 llvm::FixedVectorType::get(HalfTy, 8), false, Ops,
14056 E, "vfcvtn");
14057 case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
14058 llvm::Type *Ty = llvm::FixedVectorType::get(Int8Ty, 16);
14059 Ops[0] = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
14060 Builder.getInt64(0));
14061 return EmitFP8NeonCvtCall(Intrinsic::aarch64_neon_fp8_fcvtn2, Ty,
14062 Ops[1]->getType(), false, Ops, E, "vfcvtn2");
14063 }
14064
14065 case NEON::BI__builtin_neon_vdot_f16_mf8_fpm:
14066 case NEON::BI__builtin_neon_vdotq_f16_mf8_fpm:
14067 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2, false, HalfTy,
14068 Ops, E, "fdot2");
14069 case NEON::BI__builtin_neon_vdot_lane_f16_mf8_fpm:
14070 case NEON::BI__builtin_neon_vdotq_lane_f16_mf8_fpm:
14071 ExtendLaneArg = true;
14072 LLVM_FALLTHROUGH;
14073 case NEON::BI__builtin_neon_vdot_laneq_f16_mf8_fpm:
14074 case NEON::BI__builtin_neon_vdotq_laneq_f16_mf8_fpm:
14075 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot2_lane,
14076 ExtendLaneArg, HalfTy, Ops, E, "fdot2_lane");
14077 case NEON::BI__builtin_neon_vdot_f32_mf8_fpm:
14078 case NEON::BI__builtin_neon_vdotq_f32_mf8_fpm:
14079 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4, false,
14080 FloatTy, Ops, E, "fdot4");
14081 case NEON::BI__builtin_neon_vdot_lane_f32_mf8_fpm:
14082 case NEON::BI__builtin_neon_vdotq_lane_f32_mf8_fpm:
14083 ExtendLaneArg = true;
14084 LLVM_FALLTHROUGH;
14085 case NEON::BI__builtin_neon_vdot_laneq_f32_mf8_fpm:
14086 case NEON::BI__builtin_neon_vdotq_laneq_f32_mf8_fpm:
14087 return EmitFP8NeonFDOTCall(Intrinsic::aarch64_neon_fp8_fdot4_lane,
14088 ExtendLaneArg, FloatTy, Ops, E, "fdot4_lane");
14089
14090 case NEON::BI__builtin_neon_vmlalbq_f16_mf8_fpm:
14091 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalb,
14092 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
14093 "vmlal");
14094 case NEON::BI__builtin_neon_vmlaltq_f16_mf8_fpm:
14095 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalt,
14096 {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E,
14097 "vmlal");
14098 case NEON::BI__builtin_neon_vmlallbbq_f32_mf8_fpm:
14099 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbb,
14100 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
14101 "vmlall");
14102 case NEON::BI__builtin_neon_vmlallbtq_f32_mf8_fpm:
14103 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlallbt,
14104 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
14105 "vmlall");
14106 case NEON::BI__builtin_neon_vmlalltbq_f32_mf8_fpm:
14107 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltb,
14108 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
14109 "vmlall");
14110 case NEON::BI__builtin_neon_vmlallttq_f32_mf8_fpm:
14111 return EmitFP8NeonCall(Intrinsic::aarch64_neon_fp8_fmlalltt,
14112 {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E,
14113 "vmlall");
14114 case NEON::BI__builtin_neon_vmlalbq_lane_f16_mf8_fpm:
14115 ExtendLaneArg = true;
14116 LLVM_FALLTHROUGH;
14117 case NEON::BI__builtin_neon_vmlalbq_laneq_f16_mf8_fpm:
14118 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalb_lane,
14119 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
14120 case NEON::BI__builtin_neon_vmlaltq_lane_f16_mf8_fpm:
14121 ExtendLaneArg = true;
14122 LLVM_FALLTHROUGH;
14123 case NEON::BI__builtin_neon_vmlaltq_laneq_f16_mf8_fpm:
14124 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalt_lane,
14125 ExtendLaneArg, HalfTy, Ops, E, "vmlal_lane");
14126 case NEON::BI__builtin_neon_vmlallbbq_lane_f32_mf8_fpm:
14127 ExtendLaneArg = true;
14128 LLVM_FALLTHROUGH;
14129 case NEON::BI__builtin_neon_vmlallbbq_laneq_f32_mf8_fpm:
14130 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbb_lane,
14131 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
14132 case NEON::BI__builtin_neon_vmlallbtq_lane_f32_mf8_fpm:
14133 ExtendLaneArg = true;
14134 LLVM_FALLTHROUGH;
14135 case NEON::BI__builtin_neon_vmlallbtq_laneq_f32_mf8_fpm:
14136 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlallbt_lane,
14137 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
14138 case NEON::BI__builtin_neon_vmlalltbq_lane_f32_mf8_fpm:
14139 ExtendLaneArg = true;
14140 LLVM_FALLTHROUGH;
14141 case NEON::BI__builtin_neon_vmlalltbq_laneq_f32_mf8_fpm:
14142 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltb_lane,
14143 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
14144 case NEON::BI__builtin_neon_vmlallttq_lane_f32_mf8_fpm:
14145 ExtendLaneArg = true;
14146 LLVM_FALLTHROUGH;
14147 case NEON::BI__builtin_neon_vmlallttq_laneq_f32_mf8_fpm:
14148 return EmitFP8NeonFMLACall(Intrinsic::aarch64_neon_fp8_fmlalltt_lane,
14149 ExtendLaneArg, FloatTy, Ops, E, "vmlall_lane");
14150 case NEON::BI__builtin_neon_vamin_f16:
14151 case NEON::BI__builtin_neon_vaminq_f16:
14152 case NEON::BI__builtin_neon_vamin_f32:
14153 case NEON::BI__builtin_neon_vaminq_f32:
14154 case NEON::BI__builtin_neon_vaminq_f64: {
14155 Int = Intrinsic::aarch64_neon_famin;
14156 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
14157 }
14158 case NEON::BI__builtin_neon_vamax_f16:
14159 case NEON::BI__builtin_neon_vamaxq_f16:
14160 case NEON::BI__builtin_neon_vamax_f32:
14161 case NEON::BI__builtin_neon_vamaxq_f32:
14162 case NEON::BI__builtin_neon_vamaxq_f64: {
14163 Int = Intrinsic::aarch64_neon_famax;
14164 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
14165 }
14166 case NEON::BI__builtin_neon_vscale_f16:
14167 case NEON::BI__builtin_neon_vscaleq_f16:
14168 case NEON::BI__builtin_neon_vscale_f32:
14169 case NEON::BI__builtin_neon_vscaleq_f32:
14170 case NEON::BI__builtin_neon_vscaleq_f64: {
14171 Int = Intrinsic::aarch64_neon_fp8_fscale;
14172 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
14173 }
14174 }
14175}
14176
14177Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
14178 const CallExpr *E) {
14179 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
14180 BuiltinID == BPF::BI__builtin_btf_type_id ||
14181 BuiltinID == BPF::BI__builtin_preserve_type_info ||
14182 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
14183 "unexpected BPF builtin");
14184
14185 // A sequence number, injected into IR builtin functions, to
14186 // prevent CSE given the only difference of the function
14187 // may just be the debuginfo metadata.
14188 static uint32_t BuiltinSeqNum;
14189
14190 switch (BuiltinID) {
14191 default:
14192 llvm_unreachable("Unexpected BPF builtin");
14193 case BPF::BI__builtin_preserve_field_info: {
14194 const Expr *Arg = E->getArg(0);
14195 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
14196
14197 if (!getDebugInfo()) {
14198 CGM.Error(E->getExprLoc(),
14199 "using __builtin_preserve_field_info() without -g");
14200 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14201 : EmitLValue(Arg).emitRawPointer(*this);
14202 }
14203
14204 // Enable underlying preserve_*_access_index() generation.
14205 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
14206 IsInPreservedAIRegion = true;
14207 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14208 : EmitLValue(Arg).emitRawPointer(*this);
14209 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
14210
14211 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14212 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
14213
14214 // Built the IR for the preserve_field_info intrinsic.
14215 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getOrInsertDeclaration(
14216 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
14217 {FieldAddr->getType()});
14218 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
14219 }
14220 case BPF::BI__builtin_btf_type_id:
14221 case BPF::BI__builtin_preserve_type_info: {
14222 if (!getDebugInfo()) {
14223 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14224 return nullptr;
14225 }
14226
14227 const Expr *Arg0 = E->getArg(0);
14228 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14229 Arg0->getType(), Arg0->getExprLoc());
14230
14231 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14232 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14233 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14234
14235 llvm::Function *FnDecl;
14236 if (BuiltinID == BPF::BI__builtin_btf_type_id)
14237 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14238 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
14239 else
14240 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14241 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
14242 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
14243 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14244 return Fn;
14245 }
14246 case BPF::BI__builtin_preserve_enum_value: {
14247 if (!getDebugInfo()) {
14248 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14249 return nullptr;
14250 }
14251
14252 const Expr *Arg0 = E->getArg(0);
14253 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14254 Arg0->getType(), Arg0->getExprLoc());
14255
14256 // Find enumerator
14257 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
14258 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
14259 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
14260 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
14261
14262 auto InitVal = Enumerator->getInitVal();
14263 std::string InitValStr;
14264 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
14265 InitValStr = std::to_string(InitVal.getSExtValue());
14266 else
14267 InitValStr = std::to_string(InitVal.getZExtValue());
14268 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
14269 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
14270
14271 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14272 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14273 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14274
14275 llvm::Function *IntrinsicFn = llvm::Intrinsic::getOrInsertDeclaration(
14276 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
14277 CallInst *Fn =
14278 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
14279 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14280 return Fn;
14281 }
14282 }
14283}
14284
14285llvm::Value *CodeGenFunction::
14287 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
14288 "Not a power-of-two sized vector!");
14289 bool AllConstants = true;
14290 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
14291 AllConstants &= isa<Constant>(Ops[i]);
14292
14293 // If this is a constant vector, create a ConstantVector.
14294 if (AllConstants) {
14296 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14297 CstOps.push_back(cast<Constant>(Ops[i]));
14298 return llvm::ConstantVector::get(CstOps);
14299 }
14300
14301 // Otherwise, insertelement the values to build the vector.
14302 Value *Result = llvm::PoisonValue::get(
14303 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
14304
14305 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14306 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
14307
14308 return Result;
14309}
14310
14311// Convert the mask from an integer type to a vector of i1.
14313 unsigned NumElts) {
14314
14315 auto *MaskTy = llvm::FixedVectorType::get(
14316 CGF.Builder.getInt1Ty(),
14317 cast<IntegerType>(Mask->getType())->getBitWidth());
14318 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
14319
14320 // If we have less than 8 elements, then the starting mask was an i8 and
14321 // we need to extract down to the right number of elements.
14322 if (NumElts < 8) {
14323 int Indices[4];
14324 for (unsigned i = 0; i != NumElts; ++i)
14325 Indices[i] = i;
14326 MaskVec = CGF.Builder.CreateShuffleVector(
14327 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
14328 }
14329 return MaskVec;
14330}
14331
14333 Align Alignment) {
14334 Value *Ptr = Ops[0];
14335
14336 Value *MaskVec = getMaskVecValue(
14337 CGF, Ops[2],
14338 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
14339
14340 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
14341}
14342
14344 Align Alignment) {
14345 llvm::Type *Ty = Ops[1]->getType();
14346 Value *Ptr = Ops[0];
14347
14348 Value *MaskVec = getMaskVecValue(
14349 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
14350
14351 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
14352}
14353
14355 ArrayRef<Value *> Ops) {
14356 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
14357 Value *Ptr = Ops[0];
14358
14359 Value *MaskVec = getMaskVecValue(
14360 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
14361
14362 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
14363 ResultTy);
14364 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
14365}
14366
14369 bool IsCompress) {
14370 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14371
14372 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14373
14374 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
14375 : Intrinsic::x86_avx512_mask_expand;
14376 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
14377 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
14378}
14379
14381 ArrayRef<Value *> Ops) {
14382 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14383 Value *Ptr = Ops[0];
14384
14385 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14386
14387 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
14388 ResultTy);
14389 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
14390}
14391
14392static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
14394 bool InvertLHS = false) {
14395 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14396 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
14397 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
14398
14399 if (InvertLHS)
14400 LHS = CGF.Builder.CreateNot(LHS);
14401
14402 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
14403 Ops[0]->getType());
14404}
14405
14407 Value *Amt, bool IsRight) {
14408 llvm::Type *Ty = Op0->getType();
14409
14410 // Amount may be scalar immediate, in which case create a splat vector.
14411 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
14412 // we only care about the lowest log2 bits anyway.
14413 if (Amt->getType() != Ty) {
14414 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
14415 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
14416 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
14417 }
14418
14419 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
14420 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
14421 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
14422}
14423
14425 bool IsSigned) {
14426 Value *Op0 = Ops[0];
14427 Value *Op1 = Ops[1];
14428 llvm::Type *Ty = Op0->getType();
14429 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14430
14431 CmpInst::Predicate Pred;
14432 switch (Imm) {
14433 case 0x0:
14434 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
14435 break;
14436 case 0x1:
14437 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
14438 break;
14439 case 0x2:
14440 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
14441 break;
14442 case 0x3:
14443 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
14444 break;
14445 case 0x4:
14446 Pred = ICmpInst::ICMP_EQ;
14447 break;
14448 case 0x5:
14449 Pred = ICmpInst::ICMP_NE;
14450 break;
14451 case 0x6:
14452 return llvm::Constant::getNullValue(Ty); // FALSE
14453 case 0x7:
14454 return llvm::Constant::getAllOnesValue(Ty); // TRUE
14455 default:
14456 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
14457 }
14458
14459 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
14460 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
14461 return Res;
14462}
14463
14465 Value *Mask, Value *Op0, Value *Op1) {
14466
14467 // If the mask is all ones just return first argument.
14468 if (const auto *C = dyn_cast<Constant>(Mask))
14469 if (C->isAllOnesValue())
14470 return Op0;
14471
14472 Mask = getMaskVecValue(
14473 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
14474
14475 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14476}
14477
14479 Value *Mask, Value *Op0, Value *Op1) {
14480 // If the mask is all ones just return first argument.
14481 if (const auto *C = dyn_cast<Constant>(Mask))
14482 if (C->isAllOnesValue())
14483 return Op0;
14484
14485 auto *MaskTy = llvm::FixedVectorType::get(
14486 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
14487 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
14488 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
14489 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14490}
14491
14493 unsigned NumElts, Value *MaskIn) {
14494 if (MaskIn) {
14495 const auto *C = dyn_cast<Constant>(MaskIn);
14496 if (!C || !C->isAllOnesValue())
14497 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
14498 }
14499
14500 if (NumElts < 8) {
14501 int Indices[8];
14502 for (unsigned i = 0; i != NumElts; ++i)
14503 Indices[i] = i;
14504 for (unsigned i = NumElts; i != 8; ++i)
14505 Indices[i] = i % NumElts + NumElts;
14506 Cmp = CGF.Builder.CreateShuffleVector(
14507 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
14508 }
14509
14510 return CGF.Builder.CreateBitCast(Cmp,
14511 IntegerType::get(CGF.getLLVMContext(),
14512 std::max(NumElts, 8U)));
14513}
14514
14516 bool Signed, ArrayRef<Value *> Ops) {
14517 assert((Ops.size() == 2 || Ops.size() == 4) &&
14518 "Unexpected number of arguments");
14519 unsigned NumElts =
14520 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14521 Value *Cmp;
14522
14523 if (CC == 3) {
14524 Cmp = Constant::getNullValue(
14525 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14526 } else if (CC == 7) {
14527 Cmp = Constant::getAllOnesValue(
14528 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14529 } else {
14530 ICmpInst::Predicate Pred;
14531 switch (CC) {
14532 default: llvm_unreachable("Unknown condition code");
14533 case 0: Pred = ICmpInst::ICMP_EQ; break;
14534 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
14535 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
14536 case 4: Pred = ICmpInst::ICMP_NE; break;
14537 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
14538 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
14539 }
14540 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
14541 }
14542
14543 Value *MaskIn = nullptr;
14544 if (Ops.size() == 4)
14545 MaskIn = Ops[3];
14546
14547 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
14548}
14549
14551 Value *Zero = Constant::getNullValue(In->getType());
14552 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
14553}
14554
14556 ArrayRef<Value *> Ops, bool IsSigned) {
14557 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
14558 llvm::Type *Ty = Ops[1]->getType();
14559
14560 Value *Res;
14561 if (Rnd != 4) {
14562 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
14563 : Intrinsic::x86_avx512_uitofp_round;
14564 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
14565 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
14566 } else {
14567 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14568 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
14569 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
14570 }
14571
14572 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14573}
14574
14575// Lowers X86 FMA intrinsics to IR.
14577 ArrayRef<Value *> Ops, unsigned BuiltinID,
14578 bool IsAddSub) {
14579
14580 bool Subtract = false;
14581 Intrinsic::ID IID = Intrinsic::not_intrinsic;
14582 switch (BuiltinID) {
14583 default: break;
14584 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14585 Subtract = true;
14586 [[fallthrough]];
14587 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14588 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14589 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14590 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
14591 break;
14592 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14593 Subtract = true;
14594 [[fallthrough]];
14595 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14596 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14597 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14598 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
14599 break;
14600 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14601 Subtract = true;
14602 [[fallthrough]];
14603 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14604 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14605 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14606 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
14607 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14608 Subtract = true;
14609 [[fallthrough]];
14610 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14611 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14612 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14613 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
14614 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14615 Subtract = true;
14616 [[fallthrough]];
14617 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14618 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14619 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14620 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
14621 break;
14622 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14623 Subtract = true;
14624 [[fallthrough]];
14625 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14626 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14627 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14628 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
14629 break;
14630 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14631 Subtract = true;
14632 LLVM_FALLTHROUGH;
14633 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14634 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14635 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14636 IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
14637 break;
14638 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14639 Subtract = true;
14640 LLVM_FALLTHROUGH;
14641 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14642 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14643 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14644 IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
14645 break;
14646 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14647 Subtract = true;
14648 LLVM_FALLTHROUGH;
14649 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14650 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14651 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14652 IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
14653 break;
14654 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14655 Subtract = true;
14656 LLVM_FALLTHROUGH;
14657 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14658 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14659 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14660 IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
14661 break;
14662 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14663 Subtract = true;
14664 LLVM_FALLTHROUGH;
14665 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14666 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14667 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14668 IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
14669 break;
14670 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14671 Subtract = true;
14672 LLVM_FALLTHROUGH;
14673 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14674 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14675 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14676 IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
14677 break;
14678 }
14679
14680 Value *A = Ops[0];
14681 Value *B = Ops[1];
14682 Value *C = Ops[2];
14683
14684 if (Subtract)
14685 C = CGF.Builder.CreateFNeg(C);
14686
14687 Value *Res;
14688
14689 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
14690 if (IID != Intrinsic::not_intrinsic &&
14691 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
14692 IsAddSub)) {
14693 Function *Intr = CGF.CGM.getIntrinsic(IID);
14694 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
14695 } else {
14696 llvm::Type *Ty = A->getType();
14697 Function *FMA;
14698 if (CGF.Builder.getIsFPConstrained()) {
14699 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14700 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
14701 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
14702 } else {
14703 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
14704 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
14705 }
14706 }
14707
14708 // Handle any required masking.
14709 Value *MaskFalseVal = nullptr;
14710 switch (BuiltinID) {
14711 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14712 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14713 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14714 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14715 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14716 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14717 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14718 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14719 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14720 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14721 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14722 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14723 MaskFalseVal = Ops[0];
14724 break;
14725 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14726 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14727 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14728 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14729 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14730 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14731 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14732 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14733 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14734 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14735 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14736 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14737 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14738 break;
14739 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14740 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14741 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14742 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14743 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14744 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14745 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14746 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14747 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14748 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14749 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14750 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14751 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14752 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14753 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14754 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14755 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14756 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14757 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14758 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14759 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14760 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14761 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14762 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14763 MaskFalseVal = Ops[2];
14764 break;
14765 }
14766
14767 if (MaskFalseVal)
14768 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14769
14770 return Res;
14771}
14772
14774 MutableArrayRef<Value *> Ops, Value *Upper,
14775 bool ZeroMask = false, unsigned PTIdx = 0,
14776 bool NegAcc = false) {
14777 unsigned Rnd = 4;
14778 if (Ops.size() > 4)
14779 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14780
14781 if (NegAcc)
14782 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14783
14784 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14785 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14786 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14787 Value *Res;
14788 if (Rnd != 4) {
14789 Intrinsic::ID IID;
14790
14791 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14792 case 16:
14793 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14794 break;
14795 case 32:
14796 IID = Intrinsic::x86_avx512_vfmadd_f32;
14797 break;
14798 case 64:
14799 IID = Intrinsic::x86_avx512_vfmadd_f64;
14800 break;
14801 default:
14802 llvm_unreachable("Unexpected size");
14803 }
14804 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14805 {Ops[0], Ops[1], Ops[2], Ops[4]});
14806 } else if (CGF.Builder.getIsFPConstrained()) {
14807 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14808 Function *FMA = CGF.CGM.getIntrinsic(
14809 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14810 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14811 } else {
14812 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14813 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14814 }
14815 // If we have more than 3 arguments, we need to do masking.
14816 if (Ops.size() > 3) {
14817 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14818 : Ops[PTIdx];
14819
14820 // If we negated the accumulator and the its the PassThru value we need to
14821 // bypass the negate. Conveniently Upper should be the same thing in this
14822 // case.
14823 if (NegAcc && PTIdx == 2)
14824 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14825
14826 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14827 }
14828 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14829}
14830
14831static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14832 ArrayRef<Value *> Ops) {
14833 llvm::Type *Ty = Ops[0]->getType();
14834 // Arguments have a vXi32 type so cast to vXi64.
14835 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14836 Ty->getPrimitiveSizeInBits() / 64);
14837 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14838 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14839
14840 if (IsSigned) {
14841 // Shift left then arithmetic shift right.
14842 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14843 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14844 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14845 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14846 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14847 } else {
14848 // Clear the upper bits.
14849 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14850 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14851 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14852 }
14853
14854 return CGF.Builder.CreateMul(LHS, RHS);
14855}
14856
14857// Emit a masked pternlog intrinsic. This only exists because the header has to
14858// use a macro and we aren't able to pass the input argument to a pternlog
14859// builtin and a select builtin without evaluating it twice.
14860static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14861 ArrayRef<Value *> Ops) {
14862 llvm::Type *Ty = Ops[0]->getType();
14863
14864 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14865 unsigned EltWidth = Ty->getScalarSizeInBits();
14866 Intrinsic::ID IID;
14867 if (VecWidth == 128 && EltWidth == 32)
14868 IID = Intrinsic::x86_avx512_pternlog_d_128;
14869 else if (VecWidth == 256 && EltWidth == 32)
14870 IID = Intrinsic::x86_avx512_pternlog_d_256;
14871 else if (VecWidth == 512 && EltWidth == 32)
14872 IID = Intrinsic::x86_avx512_pternlog_d_512;
14873 else if (VecWidth == 128 && EltWidth == 64)
14874 IID = Intrinsic::x86_avx512_pternlog_q_128;
14875 else if (VecWidth == 256 && EltWidth == 64)
14876 IID = Intrinsic::x86_avx512_pternlog_q_256;
14877 else if (VecWidth == 512 && EltWidth == 64)
14878 IID = Intrinsic::x86_avx512_pternlog_q_512;
14879 else
14880 llvm_unreachable("Unexpected intrinsic");
14881
14882 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14883 Ops.drop_back());
14884 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14885 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14886}
14887
14889 llvm::Type *DstTy) {
14890 unsigned NumberOfElements =
14891 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14892 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14893 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14894}
14895
14896Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14897 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14898 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14899 return EmitX86CpuIs(CPUStr);
14900}
14901
14902// Convert F16 halfs to floats.
14905 llvm::Type *DstTy) {
14906 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14907 "Unknown cvtph2ps intrinsic");
14908
14909 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14910 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14911 Function *F =
14912 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14913 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14914 }
14915
14916 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14917 Value *Src = Ops[0];
14918
14919 // Extract the subvector.
14920 if (NumDstElts !=
14921 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14922 assert(NumDstElts == 4 && "Unexpected vector size");
14923 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14924 }
14925
14926 // Bitcast from vXi16 to vXf16.
14927 auto *HalfTy = llvm::FixedVectorType::get(
14928 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14929 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14930
14931 // Perform the fp-extension.
14932 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14933
14934 if (Ops.size() >= 3)
14935 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14936 return Res;
14937}
14938
14939Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14940
14941 llvm::Type *Int32Ty = Builder.getInt32Ty();
14942
14943 // Matching the struct layout from the compiler-rt/libgcc structure that is
14944 // filled in:
14945 // unsigned int __cpu_vendor;
14946 // unsigned int __cpu_type;
14947 // unsigned int __cpu_subtype;
14948 // unsigned int __cpu_features[1];
14949 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14950 llvm::ArrayType::get(Int32Ty, 1));
14951
14952 // Grab the global __cpu_model.
14953 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14954 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14955
14956 // Calculate the index needed to access the correct field based on the
14957 // range. Also adjust the expected value.
14958 unsigned Index;
14959 unsigned Value;
14960 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14961#define X86_VENDOR(ENUM, STRING) \
14962 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14963#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14964 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14965#define X86_CPU_TYPE(ENUM, STR) \
14966 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14967#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14968 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14969#define X86_CPU_SUBTYPE(ENUM, STR) \
14970 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14971#include "llvm/TargetParser/X86TargetParser.def"
14972 .Default({0, 0});
14973 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14974
14975 // Grab the appropriate field from __cpu_model.
14976 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14977 ConstantInt::get(Int32Ty, Index)};
14978 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14979 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14981
14982 // Check the value of the field against the requested value.
14983 return Builder.CreateICmpEQ(CpuValue,
14984 llvm::ConstantInt::get(Int32Ty, Value));
14985}
14986
14987Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14988 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14989 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14990 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14991 return Builder.getFalse();
14992 return EmitX86CpuSupports(FeatureStr);
14993}
14994
14995Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14996 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14997}
14998
14999llvm::Value *
15000CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
15001 Value *Result = Builder.getTrue();
15002 if (FeatureMask[0] != 0) {
15003 // Matching the struct layout from the compiler-rt/libgcc structure that is
15004 // filled in:
15005 // unsigned int __cpu_vendor;
15006 // unsigned int __cpu_type;
15007 // unsigned int __cpu_subtype;
15008 // unsigned int __cpu_features[1];
15009 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
15010 llvm::ArrayType::get(Int32Ty, 1));
15011
15012 // Grab the global __cpu_model.
15013 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
15014 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
15015
15016 // Grab the first (0th) element from the field __cpu_features off of the
15017 // global in the struct STy.
15018 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
15019 Builder.getInt32(0)};
15020 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
15021 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
15023
15024 // Check the value of the bit corresponding to the feature requested.
15025 Value *Mask = Builder.getInt32(FeatureMask[0]);
15026 Value *Bitset = Builder.CreateAnd(Features, Mask);
15027 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
15028 Result = Builder.CreateAnd(Result, Cmp);
15029 }
15030
15031 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
15032 llvm::Constant *CpuFeatures2 =
15033 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
15034 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
15035 for (int i = 1; i != 4; ++i) {
15036 const uint32_t M = FeatureMask[i];
15037 if (!M)
15038 continue;
15039 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
15040 Value *Features = Builder.CreateAlignedLoad(
15041 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
15043 // Check the value of the bit corresponding to the feature requested.
15044 Value *Mask = Builder.getInt32(M);
15045 Value *Bitset = Builder.CreateAnd(Features, Mask);
15046 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
15047 Result = Builder.CreateAnd(Result, Cmp);
15048 }
15049
15050 return Result;
15051}
15052
15053Value *CodeGenFunction::EmitAArch64CpuInit() {
15054 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
15055 llvm::FunctionCallee Func =
15056 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
15057 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
15058 cast<llvm::GlobalValue>(Func.getCallee())
15059 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
15060 return Builder.CreateCall(Func);
15061}
15062
15064 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {VoidPtrTy}, false);
15065 llvm::FunctionCallee Func =
15066 CGM.CreateRuntimeFunction(FTy, "__init_riscv_feature_bits");
15067 auto *CalleeGV = cast<llvm::GlobalValue>(Func.getCallee());
15068 CalleeGV->setDSOLocal(true);
15069 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
15070 return Builder.CreateCall(Func, {llvm::ConstantPointerNull::get(VoidPtrTy)});
15071}
15072
15073Value *CodeGenFunction::EmitX86CpuInit() {
15074 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
15075 /*Variadic*/ false);
15076 llvm::FunctionCallee Func =
15077 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
15078 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
15079 cast<llvm::GlobalValue>(Func.getCallee())
15080 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
15081 return Builder.CreateCall(Func);
15082}
15083
15084Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
15085 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
15086 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
15088 ArgStr.split(Features, "+");
15089 for (auto &Feature : Features) {
15090 Feature = Feature.trim();
15091 if (!llvm::AArch64::parseFMVExtension(Feature))
15092 return Builder.getFalse();
15093 if (Feature != "default")
15094 Features.push_back(Feature);
15095 }
15096 return EmitAArch64CpuSupports(Features);
15097}
15098
15099llvm::Value *
15100CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
15101 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
15102 Value *Result = Builder.getTrue();
15103 if (FeaturesMask != 0) {
15104 // Get features from structure in runtime library
15105 // struct {
15106 // unsigned long long features;
15107 // } __aarch64_cpu_features;
15108 llvm::Type *STy = llvm::StructType::get(Int64Ty);
15109 llvm::Constant *AArch64CPUFeatures =
15110 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
15111 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
15112 llvm::Value *CpuFeatures = Builder.CreateGEP(
15113 STy, AArch64CPUFeatures,
15114 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
15115 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
15117 Value *Mask = Builder.getInt64(FeaturesMask);
15118 Value *Bitset = Builder.CreateAnd(Features, Mask);
15119 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
15120 Result = Builder.CreateAnd(Result, Cmp);
15121 }
15122 return Result;
15123}
15124
15126
15127 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
15128 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
15129 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
15130 return Builder.getFalse();
15131
15132 return EmitRISCVCpuSupports(ArrayRef<StringRef>(FeatureStr));
15133}
15134
15135static Value *loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder,
15136 CodeGenModule &CGM) {
15137 llvm::Type *Int32Ty = Builder.getInt32Ty();
15138 llvm::Type *Int64Ty = Builder.getInt64Ty();
15139 llvm::ArrayType *ArrayOfInt64Ty =
15140 llvm::ArrayType::get(Int64Ty, llvm::RISCVISAInfo::FeatureBitSize);
15141 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
15142 llvm::Constant *RISCVFeaturesBits =
15143 CGM.CreateRuntimeVariable(StructTy, "__riscv_feature_bits");
15144 cast<llvm::GlobalValue>(RISCVFeaturesBits)->setDSOLocal(true);
15145 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
15146 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
15147 IndexVal};
15148 Value *Ptr =
15149 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
15150 Value *FeaturesBit =
15151 Builder.CreateAlignedLoad(Int64Ty, Ptr, CharUnits::fromQuantity(8));
15152 return FeaturesBit;
15153}
15154
15156 const unsigned RISCVFeatureLength = llvm::RISCVISAInfo::FeatureBitSize;
15157 uint64_t RequireBitMasks[RISCVFeatureLength] = {0};
15158
15159 for (auto Feat : FeaturesStrs) {
15160 auto [GroupID, BitPos] = RISCVISAInfo::getRISCVFeaturesBitsInfo(Feat);
15161
15162 // If there isn't BitPos for this feature, skip this version.
15163 // It also report the warning to user during compilation.
15164 if (BitPos == -1)
15165 return Builder.getFalse();
15166
15167 RequireBitMasks[GroupID] |= (1ULL << BitPos);
15168 }
15169
15170 Value *Result = nullptr;
15171 for (unsigned Idx = 0; Idx < RISCVFeatureLength; Idx++) {
15172 if (RequireBitMasks[Idx] == 0)
15173 continue;
15174
15175 Value *Mask = Builder.getInt64(RequireBitMasks[Idx]);
15176 Value *Bitset =
15177 Builder.CreateAnd(loadRISCVFeatureBits(Idx, Builder, CGM), Mask);
15178 Value *CmpV = Builder.CreateICmpEQ(Bitset, Mask);
15179 Result = (!Result) ? CmpV : Builder.CreateAnd(Result, CmpV);
15180 }
15181
15182 assert(Result && "Should have value here.");
15183
15184 return Result;
15185}
15186
15187Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
15188 const CallExpr *E) {
15189 if (BuiltinID == Builtin::BI__builtin_cpu_is)
15190 return EmitX86CpuIs(E);
15191 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
15192 return EmitX86CpuSupports(E);
15193 if (BuiltinID == Builtin::BI__builtin_cpu_init)
15194 return EmitX86CpuInit();
15195
15196 // Handle MSVC intrinsics before argument evaluation to prevent double
15197 // evaluation.
15198 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
15199 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
15200
15202 bool IsMaskFCmp = false;
15203 bool IsConjFMA = false;
15204
15205 // Find out if any arguments are required to be integer constant expressions.
15206 unsigned ICEArguments = 0;
15208 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
15209 assert(Error == ASTContext::GE_None && "Should not codegen an error");
15210
15211 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
15212 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
15213 }
15214
15215 // These exist so that the builtin that takes an immediate can be bounds
15216 // checked by clang to avoid passing bad immediates to the backend. Since
15217 // AVX has a larger immediate than SSE we would need separate builtins to
15218 // do the different bounds checking. Rather than create a clang specific
15219 // SSE only builtin, this implements eight separate builtins to match gcc
15220 // implementation.
15221 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
15222 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
15223 llvm::Function *F = CGM.getIntrinsic(ID);
15224 return Builder.CreateCall(F, Ops);
15225 };
15226
15227 // For the vector forms of FP comparisons, translate the builtins directly to
15228 // IR.
15229 // TODO: The builtins could be removed if the SSE header files used vector
15230 // extension comparisons directly (vector ordered/unordered may need
15231 // additional support via __builtin_isnan()).
15232 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
15233 bool IsSignaling) {
15234 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15235 Value *Cmp;
15236 if (IsSignaling)
15237 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
15238 else
15239 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
15240 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
15241 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
15242 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
15243 return Builder.CreateBitCast(Sext, FPVecTy);
15244 };
15245
15246 switch (BuiltinID) {
15247 default: return nullptr;
15248 case X86::BI_mm_prefetch: {
15249 Value *Address = Ops[0];
15250 ConstantInt *C = cast<ConstantInt>(Ops[1]);
15251 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
15252 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
15253 Value *Data = ConstantInt::get(Int32Ty, 1);
15254 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
15255 return Builder.CreateCall(F, {Address, RW, Locality, Data});
15256 }
15257 case X86::BI_mm_clflush: {
15258 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
15259 Ops[0]);
15260 }
15261 case X86::BI_mm_lfence: {
15262 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
15263 }
15264 case X86::BI_mm_mfence: {
15265 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
15266 }
15267 case X86::BI_mm_sfence: {
15268 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
15269 }
15270 case X86::BI_mm_pause: {
15271 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
15272 }
15273 case X86::BI__rdtsc: {
15274 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
15275 }
15276 case X86::BI__builtin_ia32_rdtscp: {
15277 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
15278 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15279 Ops[0]);
15280 return Builder.CreateExtractValue(Call, 0);
15281 }
15282 case X86::BI__builtin_ia32_lzcnt_u16:
15283 case X86::BI__builtin_ia32_lzcnt_u32:
15284 case X86::BI__builtin_ia32_lzcnt_u64: {
15285 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15286 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15287 }
15288 case X86::BI__builtin_ia32_tzcnt_u16:
15289 case X86::BI__builtin_ia32_tzcnt_u32:
15290 case X86::BI__builtin_ia32_tzcnt_u64: {
15291 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
15292 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15293 }
15294 case X86::BI__builtin_ia32_undef128:
15295 case X86::BI__builtin_ia32_undef256:
15296 case X86::BI__builtin_ia32_undef512:
15297 // The x86 definition of "undef" is not the same as the LLVM definition
15298 // (PR32176). We leave optimizing away an unnecessary zero constant to the
15299 // IR optimizer and backend.
15300 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
15301 // value, we should use that here instead of a zero.
15302 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15303 case X86::BI__builtin_ia32_vec_ext_v4hi:
15304 case X86::BI__builtin_ia32_vec_ext_v16qi:
15305 case X86::BI__builtin_ia32_vec_ext_v8hi:
15306 case X86::BI__builtin_ia32_vec_ext_v4si:
15307 case X86::BI__builtin_ia32_vec_ext_v4sf:
15308 case X86::BI__builtin_ia32_vec_ext_v2di:
15309 case X86::BI__builtin_ia32_vec_ext_v32qi:
15310 case X86::BI__builtin_ia32_vec_ext_v16hi:
15311 case X86::BI__builtin_ia32_vec_ext_v8si:
15312 case X86::BI__builtin_ia32_vec_ext_v4di: {
15313 unsigned NumElts =
15314 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15315 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15316 Index &= NumElts - 1;
15317 // These builtins exist so we can ensure the index is an ICE and in range.
15318 // Otherwise we could just do this in the header file.
15319 return Builder.CreateExtractElement(Ops[0], Index);
15320 }
15321 case X86::BI__builtin_ia32_vec_set_v4hi:
15322 case X86::BI__builtin_ia32_vec_set_v16qi:
15323 case X86::BI__builtin_ia32_vec_set_v8hi:
15324 case X86::BI__builtin_ia32_vec_set_v4si:
15325 case X86::BI__builtin_ia32_vec_set_v2di:
15326 case X86::BI__builtin_ia32_vec_set_v32qi:
15327 case X86::BI__builtin_ia32_vec_set_v16hi:
15328 case X86::BI__builtin_ia32_vec_set_v8si:
15329 case X86::BI__builtin_ia32_vec_set_v4di: {
15330 unsigned NumElts =
15331 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15332 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15333 Index &= NumElts - 1;
15334 // These builtins exist so we can ensure the index is an ICE and in range.
15335 // Otherwise we could just do this in the header file.
15336 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
15337 }
15338 case X86::BI_mm_setcsr:
15339 case X86::BI__builtin_ia32_ldmxcsr: {
15340 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
15341 Builder.CreateStore(Ops[0], Tmp);
15342 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
15343 Tmp.getPointer());
15344 }
15345 case X86::BI_mm_getcsr:
15346 case X86::BI__builtin_ia32_stmxcsr: {
15348 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
15349 Tmp.getPointer());
15350 return Builder.CreateLoad(Tmp, "stmxcsr");
15351 }
15352 case X86::BI__builtin_ia32_xsave:
15353 case X86::BI__builtin_ia32_xsave64:
15354 case X86::BI__builtin_ia32_xrstor:
15355 case X86::BI__builtin_ia32_xrstor64:
15356 case X86::BI__builtin_ia32_xsaveopt:
15357 case X86::BI__builtin_ia32_xsaveopt64:
15358 case X86::BI__builtin_ia32_xrstors:
15359 case X86::BI__builtin_ia32_xrstors64:
15360 case X86::BI__builtin_ia32_xsavec:
15361 case X86::BI__builtin_ia32_xsavec64:
15362 case X86::BI__builtin_ia32_xsaves:
15363 case X86::BI__builtin_ia32_xsaves64:
15364 case X86::BI__builtin_ia32_xsetbv:
15365 case X86::BI_xsetbv: {
15366 Intrinsic::ID ID;
15367#define INTRINSIC_X86_XSAVE_ID(NAME) \
15368 case X86::BI__builtin_ia32_##NAME: \
15369 ID = Intrinsic::x86_##NAME; \
15370 break
15371 switch (BuiltinID) {
15372 default: llvm_unreachable("Unsupported intrinsic!");
15374 INTRINSIC_X86_XSAVE_ID(xsave64);
15375 INTRINSIC_X86_XSAVE_ID(xrstor);
15376 INTRINSIC_X86_XSAVE_ID(xrstor64);
15377 INTRINSIC_X86_XSAVE_ID(xsaveopt);
15378 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
15379 INTRINSIC_X86_XSAVE_ID(xrstors);
15380 INTRINSIC_X86_XSAVE_ID(xrstors64);
15381 INTRINSIC_X86_XSAVE_ID(xsavec);
15382 INTRINSIC_X86_XSAVE_ID(xsavec64);
15383 INTRINSIC_X86_XSAVE_ID(xsaves);
15384 INTRINSIC_X86_XSAVE_ID(xsaves64);
15385 INTRINSIC_X86_XSAVE_ID(xsetbv);
15386 case X86::BI_xsetbv:
15387 ID = Intrinsic::x86_xsetbv;
15388 break;
15389 }
15390#undef INTRINSIC_X86_XSAVE_ID
15391 Value *Mhi = Builder.CreateTrunc(
15392 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
15393 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
15394 Ops[1] = Mhi;
15395 Ops.push_back(Mlo);
15396 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15397 }
15398 case X86::BI__builtin_ia32_xgetbv:
15399 case X86::BI_xgetbv:
15400 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
15401 case X86::BI__builtin_ia32_storedqudi128_mask:
15402 case X86::BI__builtin_ia32_storedqusi128_mask:
15403 case X86::BI__builtin_ia32_storedquhi128_mask:
15404 case X86::BI__builtin_ia32_storedquqi128_mask:
15405 case X86::BI__builtin_ia32_storeupd128_mask:
15406 case X86::BI__builtin_ia32_storeups128_mask:
15407 case X86::BI__builtin_ia32_storedqudi256_mask:
15408 case X86::BI__builtin_ia32_storedqusi256_mask:
15409 case X86::BI__builtin_ia32_storedquhi256_mask:
15410 case X86::BI__builtin_ia32_storedquqi256_mask:
15411 case X86::BI__builtin_ia32_storeupd256_mask:
15412 case X86::BI__builtin_ia32_storeups256_mask:
15413 case X86::BI__builtin_ia32_storedqudi512_mask:
15414 case X86::BI__builtin_ia32_storedqusi512_mask:
15415 case X86::BI__builtin_ia32_storedquhi512_mask:
15416 case X86::BI__builtin_ia32_storedquqi512_mask:
15417 case X86::BI__builtin_ia32_storeupd512_mask:
15418 case X86::BI__builtin_ia32_storeups512_mask:
15419 return EmitX86MaskedStore(*this, Ops, Align(1));
15420
15421 case X86::BI__builtin_ia32_storesbf16128_mask:
15422 case X86::BI__builtin_ia32_storesh128_mask:
15423 case X86::BI__builtin_ia32_storess128_mask:
15424 case X86::BI__builtin_ia32_storesd128_mask:
15425 return EmitX86MaskedStore(*this, Ops, Align(1));
15426
15427 case X86::BI__builtin_ia32_cvtmask2b128:
15428 case X86::BI__builtin_ia32_cvtmask2b256:
15429 case X86::BI__builtin_ia32_cvtmask2b512:
15430 case X86::BI__builtin_ia32_cvtmask2w128:
15431 case X86::BI__builtin_ia32_cvtmask2w256:
15432 case X86::BI__builtin_ia32_cvtmask2w512:
15433 case X86::BI__builtin_ia32_cvtmask2d128:
15434 case X86::BI__builtin_ia32_cvtmask2d256:
15435 case X86::BI__builtin_ia32_cvtmask2d512:
15436 case X86::BI__builtin_ia32_cvtmask2q128:
15437 case X86::BI__builtin_ia32_cvtmask2q256:
15438 case X86::BI__builtin_ia32_cvtmask2q512:
15439 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
15440
15441 case X86::BI__builtin_ia32_cvtb2mask128:
15442 case X86::BI__builtin_ia32_cvtb2mask256:
15443 case X86::BI__builtin_ia32_cvtb2mask512:
15444 case X86::BI__builtin_ia32_cvtw2mask128:
15445 case X86::BI__builtin_ia32_cvtw2mask256:
15446 case X86::BI__builtin_ia32_cvtw2mask512:
15447 case X86::BI__builtin_ia32_cvtd2mask128:
15448 case X86::BI__builtin_ia32_cvtd2mask256:
15449 case X86::BI__builtin_ia32_cvtd2mask512:
15450 case X86::BI__builtin_ia32_cvtq2mask128:
15451 case X86::BI__builtin_ia32_cvtq2mask256:
15452 case X86::BI__builtin_ia32_cvtq2mask512:
15453 return EmitX86ConvertToMask(*this, Ops[0]);
15454
15455 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
15456 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
15457 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
15458 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
15459 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
15460 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
15461 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
15462 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
15463 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
15464 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
15465 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
15466 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
15467 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
15468 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
15469 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
15470 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
15471 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
15472 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
15473 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
15474 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
15475 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
15476 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
15477 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
15478 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
15479 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
15480 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
15481
15482 case X86::BI__builtin_ia32_vfmaddss3:
15483 case X86::BI__builtin_ia32_vfmaddsd3:
15484 case X86::BI__builtin_ia32_vfmaddsh3_mask:
15485 case X86::BI__builtin_ia32_vfmaddss3_mask:
15486 case X86::BI__builtin_ia32_vfmaddsd3_mask:
15487 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
15488 case X86::BI__builtin_ia32_vfmaddss:
15489 case X86::BI__builtin_ia32_vfmaddsd:
15490 return EmitScalarFMAExpr(*this, E, Ops,
15491 Constant::getNullValue(Ops[0]->getType()));
15492 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
15493 case X86::BI__builtin_ia32_vfmaddss3_maskz:
15494 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
15495 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
15496 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
15497 case X86::BI__builtin_ia32_vfmaddss3_mask3:
15498 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
15499 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
15500 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
15501 case X86::BI__builtin_ia32_vfmsubss3_mask3:
15502 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
15503 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
15504 /*NegAcc*/ true);
15505 case X86::BI__builtin_ia32_vfmaddph:
15506 case X86::BI__builtin_ia32_vfmaddps:
15507 case X86::BI__builtin_ia32_vfmaddpd:
15508 case X86::BI__builtin_ia32_vfmaddph256:
15509 case X86::BI__builtin_ia32_vfmaddps256:
15510 case X86::BI__builtin_ia32_vfmaddpd256:
15511 case X86::BI__builtin_ia32_vfmaddph512_mask:
15512 case X86::BI__builtin_ia32_vfmaddph512_maskz:
15513 case X86::BI__builtin_ia32_vfmaddph512_mask3:
15514 case X86::BI__builtin_ia32_vfmaddnepbh128:
15515 case X86::BI__builtin_ia32_vfmaddnepbh256:
15516 case X86::BI__builtin_ia32_vfmaddnepbh512:
15517 case X86::BI__builtin_ia32_vfmaddps512_mask:
15518 case X86::BI__builtin_ia32_vfmaddps512_maskz:
15519 case X86::BI__builtin_ia32_vfmaddps512_mask3:
15520 case X86::BI__builtin_ia32_vfmsubps512_mask3:
15521 case X86::BI__builtin_ia32_vfmaddpd512_mask:
15522 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
15523 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
15524 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
15525 case X86::BI__builtin_ia32_vfmsubph512_mask3:
15526 case X86::BI__builtin_ia32_vfmaddph256_round_mask:
15527 case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
15528 case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
15529 case X86::BI__builtin_ia32_vfmaddps256_round_mask:
15530 case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
15531 case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
15532 case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
15533 case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
15534 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
15535 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
15536 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
15537 case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
15538 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
15539 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
15540 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
15541 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
15542 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
15543 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
15544 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
15545 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
15546 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
15547 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
15548 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
15549 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
15550 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
15551 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
15552 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
15553 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
15554 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
15555 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
15556 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
15557 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
15558 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
15559 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
15560 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
15561 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
15562 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
15563 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
15564
15565 case X86::BI__builtin_ia32_movdqa32store128_mask:
15566 case X86::BI__builtin_ia32_movdqa64store128_mask:
15567 case X86::BI__builtin_ia32_storeaps128_mask:
15568 case X86::BI__builtin_ia32_storeapd128_mask:
15569 case X86::BI__builtin_ia32_movdqa32store256_mask:
15570 case X86::BI__builtin_ia32_movdqa64store256_mask:
15571 case X86::BI__builtin_ia32_storeaps256_mask:
15572 case X86::BI__builtin_ia32_storeapd256_mask:
15573 case X86::BI__builtin_ia32_movdqa32store512_mask:
15574 case X86::BI__builtin_ia32_movdqa64store512_mask:
15575 case X86::BI__builtin_ia32_storeaps512_mask:
15576 case X86::BI__builtin_ia32_storeapd512_mask:
15577 return EmitX86MaskedStore(
15578 *this, Ops,
15579 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15580
15581 case X86::BI__builtin_ia32_loadups128_mask:
15582 case X86::BI__builtin_ia32_loadups256_mask:
15583 case X86::BI__builtin_ia32_loadups512_mask:
15584 case X86::BI__builtin_ia32_loadupd128_mask:
15585 case X86::BI__builtin_ia32_loadupd256_mask:
15586 case X86::BI__builtin_ia32_loadupd512_mask:
15587 case X86::BI__builtin_ia32_loaddquqi128_mask:
15588 case X86::BI__builtin_ia32_loaddquqi256_mask:
15589 case X86::BI__builtin_ia32_loaddquqi512_mask:
15590 case X86::BI__builtin_ia32_loaddquhi128_mask:
15591 case X86::BI__builtin_ia32_loaddquhi256_mask:
15592 case X86::BI__builtin_ia32_loaddquhi512_mask:
15593 case X86::BI__builtin_ia32_loaddqusi128_mask:
15594 case X86::BI__builtin_ia32_loaddqusi256_mask:
15595 case X86::BI__builtin_ia32_loaddqusi512_mask:
15596 case X86::BI__builtin_ia32_loaddqudi128_mask:
15597 case X86::BI__builtin_ia32_loaddqudi256_mask:
15598 case X86::BI__builtin_ia32_loaddqudi512_mask:
15599 return EmitX86MaskedLoad(*this, Ops, Align(1));
15600
15601 case X86::BI__builtin_ia32_loadsbf16128_mask:
15602 case X86::BI__builtin_ia32_loadsh128_mask:
15603 case X86::BI__builtin_ia32_loadss128_mask:
15604 case X86::BI__builtin_ia32_loadsd128_mask:
15605 return EmitX86MaskedLoad(*this, Ops, Align(1));
15606
15607 case X86::BI__builtin_ia32_loadaps128_mask:
15608 case X86::BI__builtin_ia32_loadaps256_mask:
15609 case X86::BI__builtin_ia32_loadaps512_mask:
15610 case X86::BI__builtin_ia32_loadapd128_mask:
15611 case X86::BI__builtin_ia32_loadapd256_mask:
15612 case X86::BI__builtin_ia32_loadapd512_mask:
15613 case X86::BI__builtin_ia32_movdqa32load128_mask:
15614 case X86::BI__builtin_ia32_movdqa32load256_mask:
15615 case X86::BI__builtin_ia32_movdqa32load512_mask:
15616 case X86::BI__builtin_ia32_movdqa64load128_mask:
15617 case X86::BI__builtin_ia32_movdqa64load256_mask:
15618 case X86::BI__builtin_ia32_movdqa64load512_mask:
15619 return EmitX86MaskedLoad(
15620 *this, Ops,
15621 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15622
15623 case X86::BI__builtin_ia32_expandloaddf128_mask:
15624 case X86::BI__builtin_ia32_expandloaddf256_mask:
15625 case X86::BI__builtin_ia32_expandloaddf512_mask:
15626 case X86::BI__builtin_ia32_expandloadsf128_mask:
15627 case X86::BI__builtin_ia32_expandloadsf256_mask:
15628 case X86::BI__builtin_ia32_expandloadsf512_mask:
15629 case X86::BI__builtin_ia32_expandloaddi128_mask:
15630 case X86::BI__builtin_ia32_expandloaddi256_mask:
15631 case X86::BI__builtin_ia32_expandloaddi512_mask:
15632 case X86::BI__builtin_ia32_expandloadsi128_mask:
15633 case X86::BI__builtin_ia32_expandloadsi256_mask:
15634 case X86::BI__builtin_ia32_expandloadsi512_mask:
15635 case X86::BI__builtin_ia32_expandloadhi128_mask:
15636 case X86::BI__builtin_ia32_expandloadhi256_mask:
15637 case X86::BI__builtin_ia32_expandloadhi512_mask:
15638 case X86::BI__builtin_ia32_expandloadqi128_mask:
15639 case X86::BI__builtin_ia32_expandloadqi256_mask:
15640 case X86::BI__builtin_ia32_expandloadqi512_mask:
15641 return EmitX86ExpandLoad(*this, Ops);
15642
15643 case X86::BI__builtin_ia32_compressstoredf128_mask:
15644 case X86::BI__builtin_ia32_compressstoredf256_mask:
15645 case X86::BI__builtin_ia32_compressstoredf512_mask:
15646 case X86::BI__builtin_ia32_compressstoresf128_mask:
15647 case X86::BI__builtin_ia32_compressstoresf256_mask:
15648 case X86::BI__builtin_ia32_compressstoresf512_mask:
15649 case X86::BI__builtin_ia32_compressstoredi128_mask:
15650 case X86::BI__builtin_ia32_compressstoredi256_mask:
15651 case X86::BI__builtin_ia32_compressstoredi512_mask:
15652 case X86::BI__builtin_ia32_compressstoresi128_mask:
15653 case X86::BI__builtin_ia32_compressstoresi256_mask:
15654 case X86::BI__builtin_ia32_compressstoresi512_mask:
15655 case X86::BI__builtin_ia32_compressstorehi128_mask:
15656 case X86::BI__builtin_ia32_compressstorehi256_mask:
15657 case X86::BI__builtin_ia32_compressstorehi512_mask:
15658 case X86::BI__builtin_ia32_compressstoreqi128_mask:
15659 case X86::BI__builtin_ia32_compressstoreqi256_mask:
15660 case X86::BI__builtin_ia32_compressstoreqi512_mask:
15661 return EmitX86CompressStore(*this, Ops);
15662
15663 case X86::BI__builtin_ia32_expanddf128_mask:
15664 case X86::BI__builtin_ia32_expanddf256_mask:
15665 case X86::BI__builtin_ia32_expanddf512_mask:
15666 case X86::BI__builtin_ia32_expandsf128_mask:
15667 case X86::BI__builtin_ia32_expandsf256_mask:
15668 case X86::BI__builtin_ia32_expandsf512_mask:
15669 case X86::BI__builtin_ia32_expanddi128_mask:
15670 case X86::BI__builtin_ia32_expanddi256_mask:
15671 case X86::BI__builtin_ia32_expanddi512_mask:
15672 case X86::BI__builtin_ia32_expandsi128_mask:
15673 case X86::BI__builtin_ia32_expandsi256_mask:
15674 case X86::BI__builtin_ia32_expandsi512_mask:
15675 case X86::BI__builtin_ia32_expandhi128_mask:
15676 case X86::BI__builtin_ia32_expandhi256_mask:
15677 case X86::BI__builtin_ia32_expandhi512_mask:
15678 case X86::BI__builtin_ia32_expandqi128_mask:
15679 case X86::BI__builtin_ia32_expandqi256_mask:
15680 case X86::BI__builtin_ia32_expandqi512_mask:
15681 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
15682
15683 case X86::BI__builtin_ia32_compressdf128_mask:
15684 case X86::BI__builtin_ia32_compressdf256_mask:
15685 case X86::BI__builtin_ia32_compressdf512_mask:
15686 case X86::BI__builtin_ia32_compresssf128_mask:
15687 case X86::BI__builtin_ia32_compresssf256_mask:
15688 case X86::BI__builtin_ia32_compresssf512_mask:
15689 case X86::BI__builtin_ia32_compressdi128_mask:
15690 case X86::BI__builtin_ia32_compressdi256_mask:
15691 case X86::BI__builtin_ia32_compressdi512_mask:
15692 case X86::BI__builtin_ia32_compresssi128_mask:
15693 case X86::BI__builtin_ia32_compresssi256_mask:
15694 case X86::BI__builtin_ia32_compresssi512_mask:
15695 case X86::BI__builtin_ia32_compresshi128_mask:
15696 case X86::BI__builtin_ia32_compresshi256_mask:
15697 case X86::BI__builtin_ia32_compresshi512_mask:
15698 case X86::BI__builtin_ia32_compressqi128_mask:
15699 case X86::BI__builtin_ia32_compressqi256_mask:
15700 case X86::BI__builtin_ia32_compressqi512_mask:
15701 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
15702
15703 case X86::BI__builtin_ia32_gather3div2df:
15704 case X86::BI__builtin_ia32_gather3div2di:
15705 case X86::BI__builtin_ia32_gather3div4df:
15706 case X86::BI__builtin_ia32_gather3div4di:
15707 case X86::BI__builtin_ia32_gather3div4sf:
15708 case X86::BI__builtin_ia32_gather3div4si:
15709 case X86::BI__builtin_ia32_gather3div8sf:
15710 case X86::BI__builtin_ia32_gather3div8si:
15711 case X86::BI__builtin_ia32_gather3siv2df:
15712 case X86::BI__builtin_ia32_gather3siv2di:
15713 case X86::BI__builtin_ia32_gather3siv4df:
15714 case X86::BI__builtin_ia32_gather3siv4di:
15715 case X86::BI__builtin_ia32_gather3siv4sf:
15716 case X86::BI__builtin_ia32_gather3siv4si:
15717 case X86::BI__builtin_ia32_gather3siv8sf:
15718 case X86::BI__builtin_ia32_gather3siv8si:
15719 case X86::BI__builtin_ia32_gathersiv8df:
15720 case X86::BI__builtin_ia32_gathersiv16sf:
15721 case X86::BI__builtin_ia32_gatherdiv8df:
15722 case X86::BI__builtin_ia32_gatherdiv16sf:
15723 case X86::BI__builtin_ia32_gathersiv8di:
15724 case X86::BI__builtin_ia32_gathersiv16si:
15725 case X86::BI__builtin_ia32_gatherdiv8di:
15726 case X86::BI__builtin_ia32_gatherdiv16si: {
15727 Intrinsic::ID IID;
15728 switch (BuiltinID) {
15729 default: llvm_unreachable("Unexpected builtin");
15730 case X86::BI__builtin_ia32_gather3div2df:
15731 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
15732 break;
15733 case X86::BI__builtin_ia32_gather3div2di:
15734 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
15735 break;
15736 case X86::BI__builtin_ia32_gather3div4df:
15737 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
15738 break;
15739 case X86::BI__builtin_ia32_gather3div4di:
15740 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
15741 break;
15742 case X86::BI__builtin_ia32_gather3div4sf:
15743 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
15744 break;
15745 case X86::BI__builtin_ia32_gather3div4si:
15746 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
15747 break;
15748 case X86::BI__builtin_ia32_gather3div8sf:
15749 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
15750 break;
15751 case X86::BI__builtin_ia32_gather3div8si:
15752 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
15753 break;
15754 case X86::BI__builtin_ia32_gather3siv2df:
15755 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
15756 break;
15757 case X86::BI__builtin_ia32_gather3siv2di:
15758 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
15759 break;
15760 case X86::BI__builtin_ia32_gather3siv4df:
15761 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
15762 break;
15763 case X86::BI__builtin_ia32_gather3siv4di:
15764 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
15765 break;
15766 case X86::BI__builtin_ia32_gather3siv4sf:
15767 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
15768 break;
15769 case X86::BI__builtin_ia32_gather3siv4si:
15770 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
15771 break;
15772 case X86::BI__builtin_ia32_gather3siv8sf:
15773 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
15774 break;
15775 case X86::BI__builtin_ia32_gather3siv8si:
15776 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
15777 break;
15778 case X86::BI__builtin_ia32_gathersiv8df:
15779 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
15780 break;
15781 case X86::BI__builtin_ia32_gathersiv16sf:
15782 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
15783 break;
15784 case X86::BI__builtin_ia32_gatherdiv8df:
15785 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
15786 break;
15787 case X86::BI__builtin_ia32_gatherdiv16sf:
15788 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
15789 break;
15790 case X86::BI__builtin_ia32_gathersiv8di:
15791 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
15792 break;
15793 case X86::BI__builtin_ia32_gathersiv16si:
15794 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
15795 break;
15796 case X86::BI__builtin_ia32_gatherdiv8di:
15797 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
15798 break;
15799 case X86::BI__builtin_ia32_gatherdiv16si:
15800 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
15801 break;
15802 }
15803
15804 unsigned MinElts = std::min(
15805 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15806 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15807 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
15808 Function *Intr = CGM.getIntrinsic(IID);
15809 return Builder.CreateCall(Intr, Ops);
15810 }
15811
15812 case X86::BI__builtin_ia32_scattersiv8df:
15813 case X86::BI__builtin_ia32_scattersiv16sf:
15814 case X86::BI__builtin_ia32_scatterdiv8df:
15815 case X86::BI__builtin_ia32_scatterdiv16sf:
15816 case X86::BI__builtin_ia32_scattersiv8di:
15817 case X86::BI__builtin_ia32_scattersiv16si:
15818 case X86::BI__builtin_ia32_scatterdiv8di:
15819 case X86::BI__builtin_ia32_scatterdiv16si:
15820 case X86::BI__builtin_ia32_scatterdiv2df:
15821 case X86::BI__builtin_ia32_scatterdiv2di:
15822 case X86::BI__builtin_ia32_scatterdiv4df:
15823 case X86::BI__builtin_ia32_scatterdiv4di:
15824 case X86::BI__builtin_ia32_scatterdiv4sf:
15825 case X86::BI__builtin_ia32_scatterdiv4si:
15826 case X86::BI__builtin_ia32_scatterdiv8sf:
15827 case X86::BI__builtin_ia32_scatterdiv8si:
15828 case X86::BI__builtin_ia32_scattersiv2df:
15829 case X86::BI__builtin_ia32_scattersiv2di:
15830 case X86::BI__builtin_ia32_scattersiv4df:
15831 case X86::BI__builtin_ia32_scattersiv4di:
15832 case X86::BI__builtin_ia32_scattersiv4sf:
15833 case X86::BI__builtin_ia32_scattersiv4si:
15834 case X86::BI__builtin_ia32_scattersiv8sf:
15835 case X86::BI__builtin_ia32_scattersiv8si: {
15836 Intrinsic::ID IID;
15837 switch (BuiltinID) {
15838 default: llvm_unreachable("Unexpected builtin");
15839 case X86::BI__builtin_ia32_scattersiv8df:
15840 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15841 break;
15842 case X86::BI__builtin_ia32_scattersiv16sf:
15843 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15844 break;
15845 case X86::BI__builtin_ia32_scatterdiv8df:
15846 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15847 break;
15848 case X86::BI__builtin_ia32_scatterdiv16sf:
15849 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15850 break;
15851 case X86::BI__builtin_ia32_scattersiv8di:
15852 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15853 break;
15854 case X86::BI__builtin_ia32_scattersiv16si:
15855 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15856 break;
15857 case X86::BI__builtin_ia32_scatterdiv8di:
15858 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15859 break;
15860 case X86::BI__builtin_ia32_scatterdiv16si:
15861 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15862 break;
15863 case X86::BI__builtin_ia32_scatterdiv2df:
15864 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15865 break;
15866 case X86::BI__builtin_ia32_scatterdiv2di:
15867 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15868 break;
15869 case X86::BI__builtin_ia32_scatterdiv4df:
15870 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15871 break;
15872 case X86::BI__builtin_ia32_scatterdiv4di:
15873 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15874 break;
15875 case X86::BI__builtin_ia32_scatterdiv4sf:
15876 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15877 break;
15878 case X86::BI__builtin_ia32_scatterdiv4si:
15879 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15880 break;
15881 case X86::BI__builtin_ia32_scatterdiv8sf:
15882 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15883 break;
15884 case X86::BI__builtin_ia32_scatterdiv8si:
15885 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15886 break;
15887 case X86::BI__builtin_ia32_scattersiv2df:
15888 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15889 break;
15890 case X86::BI__builtin_ia32_scattersiv2di:
15891 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15892 break;
15893 case X86::BI__builtin_ia32_scattersiv4df:
15894 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15895 break;
15896 case X86::BI__builtin_ia32_scattersiv4di:
15897 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15898 break;
15899 case X86::BI__builtin_ia32_scattersiv4sf:
15900 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15901 break;
15902 case X86::BI__builtin_ia32_scattersiv4si:
15903 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15904 break;
15905 case X86::BI__builtin_ia32_scattersiv8sf:
15906 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15907 break;
15908 case X86::BI__builtin_ia32_scattersiv8si:
15909 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15910 break;
15911 }
15912
15913 unsigned MinElts = std::min(
15914 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15915 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15916 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15917 Function *Intr = CGM.getIntrinsic(IID);
15918 return Builder.CreateCall(Intr, Ops);
15919 }
15920
15921 case X86::BI__builtin_ia32_vextractf128_pd256:
15922 case X86::BI__builtin_ia32_vextractf128_ps256:
15923 case X86::BI__builtin_ia32_vextractf128_si256:
15924 case X86::BI__builtin_ia32_extract128i256:
15925 case X86::BI__builtin_ia32_extractf64x4_mask:
15926 case X86::BI__builtin_ia32_extractf32x4_mask:
15927 case X86::BI__builtin_ia32_extracti64x4_mask:
15928 case X86::BI__builtin_ia32_extracti32x4_mask:
15929 case X86::BI__builtin_ia32_extractf32x8_mask:
15930 case X86::BI__builtin_ia32_extracti32x8_mask:
15931 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15932 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15933 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15934 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15935 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15936 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15937 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15938 unsigned NumElts = DstTy->getNumElements();
15939 unsigned SrcNumElts =
15940 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15941 unsigned SubVectors = SrcNumElts / NumElts;
15942 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15943 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15944 Index &= SubVectors - 1; // Remove any extra bits.
15945 Index *= NumElts;
15946
15947 int Indices[16];
15948 for (unsigned i = 0; i != NumElts; ++i)
15949 Indices[i] = i + Index;
15950
15951 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15952 "extract");
15953
15954 if (Ops.size() == 4)
15955 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15956
15957 return Res;
15958 }
15959 case X86::BI__builtin_ia32_vinsertf128_pd256:
15960 case X86::BI__builtin_ia32_vinsertf128_ps256:
15961 case X86::BI__builtin_ia32_vinsertf128_si256:
15962 case X86::BI__builtin_ia32_insert128i256:
15963 case X86::BI__builtin_ia32_insertf64x4:
15964 case X86::BI__builtin_ia32_insertf32x4:
15965 case X86::BI__builtin_ia32_inserti64x4:
15966 case X86::BI__builtin_ia32_inserti32x4:
15967 case X86::BI__builtin_ia32_insertf32x8:
15968 case X86::BI__builtin_ia32_inserti32x8:
15969 case X86::BI__builtin_ia32_insertf32x4_256:
15970 case X86::BI__builtin_ia32_inserti32x4_256:
15971 case X86::BI__builtin_ia32_insertf64x2_256:
15972 case X86::BI__builtin_ia32_inserti64x2_256:
15973 case X86::BI__builtin_ia32_insertf64x2_512:
15974 case X86::BI__builtin_ia32_inserti64x2_512: {
15975 unsigned DstNumElts =
15976 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15977 unsigned SrcNumElts =
15978 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15979 unsigned SubVectors = DstNumElts / SrcNumElts;
15980 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15981 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15982 Index &= SubVectors - 1; // Remove any extra bits.
15983 Index *= SrcNumElts;
15984
15985 int Indices[16];
15986 for (unsigned i = 0; i != DstNumElts; ++i)
15987 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15988
15989 Value *Op1 = Builder.CreateShuffleVector(
15990 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15991
15992 for (unsigned i = 0; i != DstNumElts; ++i) {
15993 if (i >= Index && i < (Index + SrcNumElts))
15994 Indices[i] = (i - Index) + DstNumElts;
15995 else
15996 Indices[i] = i;
15997 }
15998
15999 return Builder.CreateShuffleVector(Ops[0], Op1,
16000 ArrayRef(Indices, DstNumElts), "insert");
16001 }
16002 case X86::BI__builtin_ia32_pmovqd512_mask:
16003 case X86::BI__builtin_ia32_pmovwb512_mask: {
16004 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
16005 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
16006 }
16007 case X86::BI__builtin_ia32_pmovdb512_mask:
16008 case X86::BI__builtin_ia32_pmovdw512_mask:
16009 case X86::BI__builtin_ia32_pmovqw512_mask: {
16010 if (const auto *C = dyn_cast<Constant>(Ops[2]))
16011 if (C->isAllOnesValue())
16012 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
16013
16014 Intrinsic::ID IID;
16015 switch (BuiltinID) {
16016 default: llvm_unreachable("Unsupported intrinsic!");
16017 case X86::BI__builtin_ia32_pmovdb512_mask:
16018 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
16019 break;
16020 case X86::BI__builtin_ia32_pmovdw512_mask:
16021 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
16022 break;
16023 case X86::BI__builtin_ia32_pmovqw512_mask:
16024 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
16025 break;
16026 }
16027
16028 Function *Intr = CGM.getIntrinsic(IID);
16029 return Builder.CreateCall(Intr, Ops);
16030 }
16031 case X86::BI__builtin_ia32_pblendw128:
16032 case X86::BI__builtin_ia32_blendpd:
16033 case X86::BI__builtin_ia32_blendps:
16034 case X86::BI__builtin_ia32_blendpd256:
16035 case X86::BI__builtin_ia32_blendps256:
16036 case X86::BI__builtin_ia32_pblendw256:
16037 case X86::BI__builtin_ia32_pblendd128:
16038 case X86::BI__builtin_ia32_pblendd256: {
16039 unsigned NumElts =
16040 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16041 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16042
16043 int Indices[16];
16044 // If there are more than 8 elements, the immediate is used twice so make
16045 // sure we handle that.
16046 for (unsigned i = 0; i != NumElts; ++i)
16047 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
16048
16049 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16050 ArrayRef(Indices, NumElts), "blend");
16051 }
16052 case X86::BI__builtin_ia32_pshuflw:
16053 case X86::BI__builtin_ia32_pshuflw256:
16054 case X86::BI__builtin_ia32_pshuflw512: {
16055 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16056 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16057 unsigned NumElts = Ty->getNumElements();
16058
16059 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16060 Imm = (Imm & 0xff) * 0x01010101;
16061
16062 int Indices[32];
16063 for (unsigned l = 0; l != NumElts; l += 8) {
16064 for (unsigned i = 0; i != 4; ++i) {
16065 Indices[l + i] = l + (Imm & 3);
16066 Imm >>= 2;
16067 }
16068 for (unsigned i = 4; i != 8; ++i)
16069 Indices[l + i] = l + i;
16070 }
16071
16072 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16073 "pshuflw");
16074 }
16075 case X86::BI__builtin_ia32_pshufhw:
16076 case X86::BI__builtin_ia32_pshufhw256:
16077 case X86::BI__builtin_ia32_pshufhw512: {
16078 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16079 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16080 unsigned NumElts = Ty->getNumElements();
16081
16082 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16083 Imm = (Imm & 0xff) * 0x01010101;
16084
16085 int Indices[32];
16086 for (unsigned l = 0; l != NumElts; l += 8) {
16087 for (unsigned i = 0; i != 4; ++i)
16088 Indices[l + i] = l + i;
16089 for (unsigned i = 4; i != 8; ++i) {
16090 Indices[l + i] = l + 4 + (Imm & 3);
16091 Imm >>= 2;
16092 }
16093 }
16094
16095 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16096 "pshufhw");
16097 }
16098 case X86::BI__builtin_ia32_pshufd:
16099 case X86::BI__builtin_ia32_pshufd256:
16100 case X86::BI__builtin_ia32_pshufd512:
16101 case X86::BI__builtin_ia32_vpermilpd:
16102 case X86::BI__builtin_ia32_vpermilps:
16103 case X86::BI__builtin_ia32_vpermilpd256:
16104 case X86::BI__builtin_ia32_vpermilps256:
16105 case X86::BI__builtin_ia32_vpermilpd512:
16106 case X86::BI__builtin_ia32_vpermilps512: {
16107 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16108 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16109 unsigned NumElts = Ty->getNumElements();
16110 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
16111 unsigned NumLaneElts = NumElts / NumLanes;
16112
16113 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16114 Imm = (Imm & 0xff) * 0x01010101;
16115
16116 int Indices[16];
16117 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16118 for (unsigned i = 0; i != NumLaneElts; ++i) {
16119 Indices[i + l] = (Imm % NumLaneElts) + l;
16120 Imm /= NumLaneElts;
16121 }
16122 }
16123
16124 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16125 "permil");
16126 }
16127 case X86::BI__builtin_ia32_shufpd:
16128 case X86::BI__builtin_ia32_shufpd256:
16129 case X86::BI__builtin_ia32_shufpd512:
16130 case X86::BI__builtin_ia32_shufps:
16131 case X86::BI__builtin_ia32_shufps256:
16132 case X86::BI__builtin_ia32_shufps512: {
16133 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16134 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16135 unsigned NumElts = Ty->getNumElements();
16136 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
16137 unsigned NumLaneElts = NumElts / NumLanes;
16138
16139 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16140 Imm = (Imm & 0xff) * 0x01010101;
16141
16142 int Indices[16];
16143 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16144 for (unsigned i = 0; i != NumLaneElts; ++i) {
16145 unsigned Index = Imm % NumLaneElts;
16146 Imm /= NumLaneElts;
16147 if (i >= (NumLaneElts / 2))
16148 Index += NumElts;
16149 Indices[l + i] = l + Index;
16150 }
16151 }
16152
16153 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16154 ArrayRef(Indices, NumElts), "shufp");
16155 }
16156 case X86::BI__builtin_ia32_permdi256:
16157 case X86::BI__builtin_ia32_permdf256:
16158 case X86::BI__builtin_ia32_permdi512:
16159 case X86::BI__builtin_ia32_permdf512: {
16160 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16161 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16162 unsigned NumElts = Ty->getNumElements();
16163
16164 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
16165 int Indices[8];
16166 for (unsigned l = 0; l != NumElts; l += 4)
16167 for (unsigned i = 0; i != 4; ++i)
16168 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
16169
16170 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16171 "perm");
16172 }
16173 case X86::BI__builtin_ia32_palignr128:
16174 case X86::BI__builtin_ia32_palignr256:
16175 case X86::BI__builtin_ia32_palignr512: {
16176 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16177
16178 unsigned NumElts =
16179 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16180 assert(NumElts % 16 == 0);
16181
16182 // If palignr is shifting the pair of vectors more than the size of two
16183 // lanes, emit zero.
16184 if (ShiftVal >= 32)
16185 return llvm::Constant::getNullValue(ConvertType(E->getType()));
16186
16187 // If palignr is shifting the pair of input vectors more than one lane,
16188 // but less than two lanes, convert to shifting in zeroes.
16189 if (ShiftVal > 16) {
16190 ShiftVal -= 16;
16191 Ops[1] = Ops[0];
16192 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
16193 }
16194
16195 int Indices[64];
16196 // 256-bit palignr operates on 128-bit lanes so we need to handle that
16197 for (unsigned l = 0; l != NumElts; l += 16) {
16198 for (unsigned i = 0; i != 16; ++i) {
16199 unsigned Idx = ShiftVal + i;
16200 if (Idx >= 16)
16201 Idx += NumElts - 16; // End of lane, switch operand.
16202 Indices[l + i] = Idx + l;
16203 }
16204 }
16205
16206 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16207 ArrayRef(Indices, NumElts), "palignr");
16208 }
16209 case X86::BI__builtin_ia32_alignd128:
16210 case X86::BI__builtin_ia32_alignd256:
16211 case X86::BI__builtin_ia32_alignd512:
16212 case X86::BI__builtin_ia32_alignq128:
16213 case X86::BI__builtin_ia32_alignq256:
16214 case X86::BI__builtin_ia32_alignq512: {
16215 unsigned NumElts =
16216 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16217 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16218
16219 // Mask the shift amount to width of a vector.
16220 ShiftVal &= NumElts - 1;
16221
16222 int Indices[16];
16223 for (unsigned i = 0; i != NumElts; ++i)
16224 Indices[i] = i + ShiftVal;
16225
16226 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16227 ArrayRef(Indices, NumElts), "valign");
16228 }
16229 case X86::BI__builtin_ia32_shuf_f32x4_256:
16230 case X86::BI__builtin_ia32_shuf_f64x2_256:
16231 case X86::BI__builtin_ia32_shuf_i32x4_256:
16232 case X86::BI__builtin_ia32_shuf_i64x2_256:
16233 case X86::BI__builtin_ia32_shuf_f32x4:
16234 case X86::BI__builtin_ia32_shuf_f64x2:
16235 case X86::BI__builtin_ia32_shuf_i32x4:
16236 case X86::BI__builtin_ia32_shuf_i64x2: {
16237 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16238 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16239 unsigned NumElts = Ty->getNumElements();
16240 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
16241 unsigned NumLaneElts = NumElts / NumLanes;
16242
16243 int Indices[16];
16244 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16245 unsigned Index = (Imm % NumLanes) * NumLaneElts;
16246 Imm /= NumLanes; // Discard the bits we just used.
16247 if (l >= (NumElts / 2))
16248 Index += NumElts; // Switch to other source.
16249 for (unsigned i = 0; i != NumLaneElts; ++i) {
16250 Indices[l + i] = Index + i;
16251 }
16252 }
16253
16254 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16255 ArrayRef(Indices, NumElts), "shuf");
16256 }
16257
16258 case X86::BI__builtin_ia32_vperm2f128_pd256:
16259 case X86::BI__builtin_ia32_vperm2f128_ps256:
16260 case X86::BI__builtin_ia32_vperm2f128_si256:
16261 case X86::BI__builtin_ia32_permti256: {
16262 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16263 unsigned NumElts =
16264 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16265
16266 // This takes a very simple approach since there are two lanes and a
16267 // shuffle can have 2 inputs. So we reserve the first input for the first
16268 // lane and the second input for the second lane. This may result in
16269 // duplicate sources, but this can be dealt with in the backend.
16270
16271 Value *OutOps[2];
16272 int Indices[8];
16273 for (unsigned l = 0; l != 2; ++l) {
16274 // Determine the source for this lane.
16275 if (Imm & (1 << ((l * 4) + 3)))
16276 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
16277 else if (Imm & (1 << ((l * 4) + 1)))
16278 OutOps[l] = Ops[1];
16279 else
16280 OutOps[l] = Ops[0];
16281
16282 for (unsigned i = 0; i != NumElts/2; ++i) {
16283 // Start with ith element of the source for this lane.
16284 unsigned Idx = (l * NumElts) + i;
16285 // If bit 0 of the immediate half is set, switch to the high half of
16286 // the source.
16287 if (Imm & (1 << (l * 4)))
16288 Idx += NumElts/2;
16289 Indices[(l * (NumElts/2)) + i] = Idx;
16290 }
16291 }
16292
16293 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
16294 ArrayRef(Indices, NumElts), "vperm");
16295 }
16296
16297 case X86::BI__builtin_ia32_pslldqi128_byteshift:
16298 case X86::BI__builtin_ia32_pslldqi256_byteshift:
16299 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
16300 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16301 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16302 // Builtin type is vXi64 so multiply by 8 to get bytes.
16303 unsigned NumElts = ResultType->getNumElements() * 8;
16304
16305 // If pslldq is shifting the vector more than 15 bytes, emit zero.
16306 if (ShiftVal >= 16)
16307 return llvm::Constant::getNullValue(ResultType);
16308
16309 int Indices[64];
16310 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
16311 for (unsigned l = 0; l != NumElts; l += 16) {
16312 for (unsigned i = 0; i != 16; ++i) {
16313 unsigned Idx = NumElts + i - ShiftVal;
16314 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
16315 Indices[l + i] = Idx + l;
16316 }
16317 }
16318
16319 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16320 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16321 Value *Zero = llvm::Constant::getNullValue(VecTy);
16322 Value *SV = Builder.CreateShuffleVector(
16323 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
16324 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
16325 }
16326 case X86::BI__builtin_ia32_psrldqi128_byteshift:
16327 case X86::BI__builtin_ia32_psrldqi256_byteshift:
16328 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
16329 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16330 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16331 // Builtin type is vXi64 so multiply by 8 to get bytes.
16332 unsigned NumElts = ResultType->getNumElements() * 8;
16333
16334 // If psrldq is shifting the vector more than 15 bytes, emit zero.
16335 if (ShiftVal >= 16)
16336 return llvm::Constant::getNullValue(ResultType);
16337
16338 int Indices[64];
16339 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
16340 for (unsigned l = 0; l != NumElts; l += 16) {
16341 for (unsigned i = 0; i != 16; ++i) {
16342 unsigned Idx = i + ShiftVal;
16343 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
16344 Indices[l + i] = Idx + l;
16345 }
16346 }
16347
16348 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16349 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16350 Value *Zero = llvm::Constant::getNullValue(VecTy);
16351 Value *SV = Builder.CreateShuffleVector(
16352 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
16353 return Builder.CreateBitCast(SV, ResultType, "cast");
16354 }
16355 case X86::BI__builtin_ia32_kshiftliqi:
16356 case X86::BI__builtin_ia32_kshiftlihi:
16357 case X86::BI__builtin_ia32_kshiftlisi:
16358 case X86::BI__builtin_ia32_kshiftlidi: {
16359 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16360 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16361
16362 if (ShiftVal >= NumElts)
16363 return llvm::Constant::getNullValue(Ops[0]->getType());
16364
16365 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16366
16367 int Indices[64];
16368 for (unsigned i = 0; i != NumElts; ++i)
16369 Indices[i] = NumElts + i - ShiftVal;
16370
16371 Value *Zero = llvm::Constant::getNullValue(In->getType());
16372 Value *SV = Builder.CreateShuffleVector(
16373 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
16374 return Builder.CreateBitCast(SV, Ops[0]->getType());
16375 }
16376 case X86::BI__builtin_ia32_kshiftriqi:
16377 case X86::BI__builtin_ia32_kshiftrihi:
16378 case X86::BI__builtin_ia32_kshiftrisi:
16379 case X86::BI__builtin_ia32_kshiftridi: {
16380 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16381 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16382
16383 if (ShiftVal >= NumElts)
16384 return llvm::Constant::getNullValue(Ops[0]->getType());
16385
16386 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16387
16388 int Indices[64];
16389 for (unsigned i = 0; i != NumElts; ++i)
16390 Indices[i] = i + ShiftVal;
16391
16392 Value *Zero = llvm::Constant::getNullValue(In->getType());
16393 Value *SV = Builder.CreateShuffleVector(
16394 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
16395 return Builder.CreateBitCast(SV, Ops[0]->getType());
16396 }
16397 case X86::BI__builtin_ia32_movnti:
16398 case X86::BI__builtin_ia32_movnti64:
16399 case X86::BI__builtin_ia32_movntsd:
16400 case X86::BI__builtin_ia32_movntss: {
16401 llvm::MDNode *Node = llvm::MDNode::get(
16402 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
16403
16404 Value *Ptr = Ops[0];
16405 Value *Src = Ops[1];
16406
16407 // Extract the 0'th element of the source vector.
16408 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
16409 BuiltinID == X86::BI__builtin_ia32_movntss)
16410 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
16411
16412 // Unaligned nontemporal store of the scalar value.
16413 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
16414 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
16415 SI->setAlignment(llvm::Align(1));
16416 return SI;
16417 }
16418 // Rotate is a special case of funnel shift - 1st 2 args are the same.
16419 case X86::BI__builtin_ia32_vprotb:
16420 case X86::BI__builtin_ia32_vprotw:
16421 case X86::BI__builtin_ia32_vprotd:
16422 case X86::BI__builtin_ia32_vprotq:
16423 case X86::BI__builtin_ia32_vprotbi:
16424 case X86::BI__builtin_ia32_vprotwi:
16425 case X86::BI__builtin_ia32_vprotdi:
16426 case X86::BI__builtin_ia32_vprotqi:
16427 case X86::BI__builtin_ia32_prold128:
16428 case X86::BI__builtin_ia32_prold256:
16429 case X86::BI__builtin_ia32_prold512:
16430 case X86::BI__builtin_ia32_prolq128:
16431 case X86::BI__builtin_ia32_prolq256:
16432 case X86::BI__builtin_ia32_prolq512:
16433 case X86::BI__builtin_ia32_prolvd128:
16434 case X86::BI__builtin_ia32_prolvd256:
16435 case X86::BI__builtin_ia32_prolvd512:
16436 case X86::BI__builtin_ia32_prolvq128:
16437 case X86::BI__builtin_ia32_prolvq256:
16438 case X86::BI__builtin_ia32_prolvq512:
16439 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
16440 case X86::BI__builtin_ia32_prord128:
16441 case X86::BI__builtin_ia32_prord256:
16442 case X86::BI__builtin_ia32_prord512:
16443 case X86::BI__builtin_ia32_prorq128:
16444 case X86::BI__builtin_ia32_prorq256:
16445 case X86::BI__builtin_ia32_prorq512:
16446 case X86::BI__builtin_ia32_prorvd128:
16447 case X86::BI__builtin_ia32_prorvd256:
16448 case X86::BI__builtin_ia32_prorvd512:
16449 case X86::BI__builtin_ia32_prorvq128:
16450 case X86::BI__builtin_ia32_prorvq256:
16451 case X86::BI__builtin_ia32_prorvq512:
16452 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
16453 case X86::BI__builtin_ia32_selectb_128:
16454 case X86::BI__builtin_ia32_selectb_256:
16455 case X86::BI__builtin_ia32_selectb_512:
16456 case X86::BI__builtin_ia32_selectw_128:
16457 case X86::BI__builtin_ia32_selectw_256:
16458 case X86::BI__builtin_ia32_selectw_512:
16459 case X86::BI__builtin_ia32_selectd_128:
16460 case X86::BI__builtin_ia32_selectd_256:
16461 case X86::BI__builtin_ia32_selectd_512:
16462 case X86::BI__builtin_ia32_selectq_128:
16463 case X86::BI__builtin_ia32_selectq_256:
16464 case X86::BI__builtin_ia32_selectq_512:
16465 case X86::BI__builtin_ia32_selectph_128:
16466 case X86::BI__builtin_ia32_selectph_256:
16467 case X86::BI__builtin_ia32_selectph_512:
16468 case X86::BI__builtin_ia32_selectpbf_128:
16469 case X86::BI__builtin_ia32_selectpbf_256:
16470 case X86::BI__builtin_ia32_selectpbf_512:
16471 case X86::BI__builtin_ia32_selectps_128:
16472 case X86::BI__builtin_ia32_selectps_256:
16473 case X86::BI__builtin_ia32_selectps_512:
16474 case X86::BI__builtin_ia32_selectpd_128:
16475 case X86::BI__builtin_ia32_selectpd_256:
16476 case X86::BI__builtin_ia32_selectpd_512:
16477 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
16478 case X86::BI__builtin_ia32_selectsh_128:
16479 case X86::BI__builtin_ia32_selectsbf_128:
16480 case X86::BI__builtin_ia32_selectss_128:
16481 case X86::BI__builtin_ia32_selectsd_128: {
16482 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16483 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16484 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
16485 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
16486 }
16487 case X86::BI__builtin_ia32_cmpb128_mask:
16488 case X86::BI__builtin_ia32_cmpb256_mask:
16489 case X86::BI__builtin_ia32_cmpb512_mask:
16490 case X86::BI__builtin_ia32_cmpw128_mask:
16491 case X86::BI__builtin_ia32_cmpw256_mask:
16492 case X86::BI__builtin_ia32_cmpw512_mask:
16493 case X86::BI__builtin_ia32_cmpd128_mask:
16494 case X86::BI__builtin_ia32_cmpd256_mask:
16495 case X86::BI__builtin_ia32_cmpd512_mask:
16496 case X86::BI__builtin_ia32_cmpq128_mask:
16497 case X86::BI__builtin_ia32_cmpq256_mask:
16498 case X86::BI__builtin_ia32_cmpq512_mask: {
16499 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16500 return EmitX86MaskedCompare(*this, CC, true, Ops);
16501 }
16502 case X86::BI__builtin_ia32_ucmpb128_mask:
16503 case X86::BI__builtin_ia32_ucmpb256_mask:
16504 case X86::BI__builtin_ia32_ucmpb512_mask:
16505 case X86::BI__builtin_ia32_ucmpw128_mask:
16506 case X86::BI__builtin_ia32_ucmpw256_mask:
16507 case X86::BI__builtin_ia32_ucmpw512_mask:
16508 case X86::BI__builtin_ia32_ucmpd128_mask:
16509 case X86::BI__builtin_ia32_ucmpd256_mask:
16510 case X86::BI__builtin_ia32_ucmpd512_mask:
16511 case X86::BI__builtin_ia32_ucmpq128_mask:
16512 case X86::BI__builtin_ia32_ucmpq256_mask:
16513 case X86::BI__builtin_ia32_ucmpq512_mask: {
16514 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16515 return EmitX86MaskedCompare(*this, CC, false, Ops);
16516 }
16517 case X86::BI__builtin_ia32_vpcomb:
16518 case X86::BI__builtin_ia32_vpcomw:
16519 case X86::BI__builtin_ia32_vpcomd:
16520 case X86::BI__builtin_ia32_vpcomq:
16521 return EmitX86vpcom(*this, Ops, true);
16522 case X86::BI__builtin_ia32_vpcomub:
16523 case X86::BI__builtin_ia32_vpcomuw:
16524 case X86::BI__builtin_ia32_vpcomud:
16525 case X86::BI__builtin_ia32_vpcomuq:
16526 return EmitX86vpcom(*this, Ops, false);
16527
16528 case X86::BI__builtin_ia32_kortestcqi:
16529 case X86::BI__builtin_ia32_kortestchi:
16530 case X86::BI__builtin_ia32_kortestcsi:
16531 case X86::BI__builtin_ia32_kortestcdi: {
16532 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16533 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
16534 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16535 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16536 }
16537 case X86::BI__builtin_ia32_kortestzqi:
16538 case X86::BI__builtin_ia32_kortestzhi:
16539 case X86::BI__builtin_ia32_kortestzsi:
16540 case X86::BI__builtin_ia32_kortestzdi: {
16541 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16542 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
16543 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16544 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16545 }
16546
16547 case X86::BI__builtin_ia32_ktestcqi:
16548 case X86::BI__builtin_ia32_ktestzqi:
16549 case X86::BI__builtin_ia32_ktestchi:
16550 case X86::BI__builtin_ia32_ktestzhi:
16551 case X86::BI__builtin_ia32_ktestcsi:
16552 case X86::BI__builtin_ia32_ktestzsi:
16553 case X86::BI__builtin_ia32_ktestcdi:
16554 case X86::BI__builtin_ia32_ktestzdi: {
16555 Intrinsic::ID IID;
16556 switch (BuiltinID) {
16557 default: llvm_unreachable("Unsupported intrinsic!");
16558 case X86::BI__builtin_ia32_ktestcqi:
16559 IID = Intrinsic::x86_avx512_ktestc_b;
16560 break;
16561 case X86::BI__builtin_ia32_ktestzqi:
16562 IID = Intrinsic::x86_avx512_ktestz_b;
16563 break;
16564 case X86::BI__builtin_ia32_ktestchi:
16565 IID = Intrinsic::x86_avx512_ktestc_w;
16566 break;
16567 case X86::BI__builtin_ia32_ktestzhi:
16568 IID = Intrinsic::x86_avx512_ktestz_w;
16569 break;
16570 case X86::BI__builtin_ia32_ktestcsi:
16571 IID = Intrinsic::x86_avx512_ktestc_d;
16572 break;
16573 case X86::BI__builtin_ia32_ktestzsi:
16574 IID = Intrinsic::x86_avx512_ktestz_d;
16575 break;
16576 case X86::BI__builtin_ia32_ktestcdi:
16577 IID = Intrinsic::x86_avx512_ktestc_q;
16578 break;
16579 case X86::BI__builtin_ia32_ktestzdi:
16580 IID = Intrinsic::x86_avx512_ktestz_q;
16581 break;
16582 }
16583
16584 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16585 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16586 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16587 Function *Intr = CGM.getIntrinsic(IID);
16588 return Builder.CreateCall(Intr, {LHS, RHS});
16589 }
16590
16591 case X86::BI__builtin_ia32_kaddqi:
16592 case X86::BI__builtin_ia32_kaddhi:
16593 case X86::BI__builtin_ia32_kaddsi:
16594 case X86::BI__builtin_ia32_kadddi: {
16595 Intrinsic::ID IID;
16596 switch (BuiltinID) {
16597 default: llvm_unreachable("Unsupported intrinsic!");
16598 case X86::BI__builtin_ia32_kaddqi:
16599 IID = Intrinsic::x86_avx512_kadd_b;
16600 break;
16601 case X86::BI__builtin_ia32_kaddhi:
16602 IID = Intrinsic::x86_avx512_kadd_w;
16603 break;
16604 case X86::BI__builtin_ia32_kaddsi:
16605 IID = Intrinsic::x86_avx512_kadd_d;
16606 break;
16607 case X86::BI__builtin_ia32_kadddi:
16608 IID = Intrinsic::x86_avx512_kadd_q;
16609 break;
16610 }
16611
16612 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16613 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16614 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16615 Function *Intr = CGM.getIntrinsic(IID);
16616 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
16617 return Builder.CreateBitCast(Res, Ops[0]->getType());
16618 }
16619 case X86::BI__builtin_ia32_kandqi:
16620 case X86::BI__builtin_ia32_kandhi:
16621 case X86::BI__builtin_ia32_kandsi:
16622 case X86::BI__builtin_ia32_kanddi:
16623 return EmitX86MaskLogic(*this, Instruction::And, Ops);
16624 case X86::BI__builtin_ia32_kandnqi:
16625 case X86::BI__builtin_ia32_kandnhi:
16626 case X86::BI__builtin_ia32_kandnsi:
16627 case X86::BI__builtin_ia32_kandndi:
16628 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
16629 case X86::BI__builtin_ia32_korqi:
16630 case X86::BI__builtin_ia32_korhi:
16631 case X86::BI__builtin_ia32_korsi:
16632 case X86::BI__builtin_ia32_kordi:
16633 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
16634 case X86::BI__builtin_ia32_kxnorqi:
16635 case X86::BI__builtin_ia32_kxnorhi:
16636 case X86::BI__builtin_ia32_kxnorsi:
16637 case X86::BI__builtin_ia32_kxnordi:
16638 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
16639 case X86::BI__builtin_ia32_kxorqi:
16640 case X86::BI__builtin_ia32_kxorhi:
16641 case X86::BI__builtin_ia32_kxorsi:
16642 case X86::BI__builtin_ia32_kxordi:
16643 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
16644 case X86::BI__builtin_ia32_knotqi:
16645 case X86::BI__builtin_ia32_knothi:
16646 case X86::BI__builtin_ia32_knotsi:
16647 case X86::BI__builtin_ia32_knotdi: {
16648 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16649 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16650 return Builder.CreateBitCast(Builder.CreateNot(Res),
16651 Ops[0]->getType());
16652 }
16653 case X86::BI__builtin_ia32_kmovb:
16654 case X86::BI__builtin_ia32_kmovw:
16655 case X86::BI__builtin_ia32_kmovd:
16656 case X86::BI__builtin_ia32_kmovq: {
16657 // Bitcast to vXi1 type and then back to integer. This gets the mask
16658 // register type into the IR, but might be optimized out depending on
16659 // what's around it.
16660 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16661 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16662 return Builder.CreateBitCast(Res, Ops[0]->getType());
16663 }
16664
16665 case X86::BI__builtin_ia32_kunpckdi:
16666 case X86::BI__builtin_ia32_kunpcksi:
16667 case X86::BI__builtin_ia32_kunpckhi: {
16668 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16669 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16670 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16671 int Indices[64];
16672 for (unsigned i = 0; i != NumElts; ++i)
16673 Indices[i] = i;
16674
16675 // First extract half of each vector. This gives better codegen than
16676 // doing it in a single shuffle.
16677 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
16678 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
16679 // Concat the vectors.
16680 // NOTE: Operands are swapped to match the intrinsic definition.
16681 Value *Res =
16682 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
16683 return Builder.CreateBitCast(Res, Ops[0]->getType());
16684 }
16685
16686 case X86::BI__builtin_ia32_vplzcntd_128:
16687 case X86::BI__builtin_ia32_vplzcntd_256:
16688 case X86::BI__builtin_ia32_vplzcntd_512:
16689 case X86::BI__builtin_ia32_vplzcntq_128:
16690 case X86::BI__builtin_ia32_vplzcntq_256:
16691 case X86::BI__builtin_ia32_vplzcntq_512: {
16692 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
16693 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
16694 }
16695 case X86::BI__builtin_ia32_sqrtss:
16696 case X86::BI__builtin_ia32_sqrtsd: {
16697 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
16698 Function *F;
16699 if (Builder.getIsFPConstrained()) {
16700 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16701 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16702 A->getType());
16703 A = Builder.CreateConstrainedFPCall(F, {A});
16704 } else {
16705 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16706 A = Builder.CreateCall(F, {A});
16707 }
16708 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16709 }
16710 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16711 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16712 case X86::BI__builtin_ia32_sqrtss_round_mask: {
16713 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
16714 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16715 // otherwise keep the intrinsic.
16716 if (CC != 4) {
16717 Intrinsic::ID IID;
16718
16719 switch (BuiltinID) {
16720 default:
16721 llvm_unreachable("Unsupported intrinsic!");
16722 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16723 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
16724 break;
16725 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16726 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
16727 break;
16728 case X86::BI__builtin_ia32_sqrtss_round_mask:
16729 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
16730 break;
16731 }
16732 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16733 }
16734 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16735 Function *F;
16736 if (Builder.getIsFPConstrained()) {
16737 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16738 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16739 A->getType());
16740 A = Builder.CreateConstrainedFPCall(F, A);
16741 } else {
16742 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16743 A = Builder.CreateCall(F, A);
16744 }
16745 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16746 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
16747 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16748 }
16749 case X86::BI__builtin_ia32_sqrtpd256:
16750 case X86::BI__builtin_ia32_sqrtpd:
16751 case X86::BI__builtin_ia32_sqrtps256:
16752 case X86::BI__builtin_ia32_sqrtps:
16753 case X86::BI__builtin_ia32_sqrtph256:
16754 case X86::BI__builtin_ia32_sqrtph:
16755 case X86::BI__builtin_ia32_sqrtph512:
16756 case X86::BI__builtin_ia32_vsqrtbf16256:
16757 case X86::BI__builtin_ia32_vsqrtbf16:
16758 case X86::BI__builtin_ia32_vsqrtbf16512:
16759 case X86::BI__builtin_ia32_sqrtps512:
16760 case X86::BI__builtin_ia32_sqrtpd512: {
16761 if (Ops.size() == 2) {
16762 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16763 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16764 // otherwise keep the intrinsic.
16765 if (CC != 4) {
16766 Intrinsic::ID IID;
16767
16768 switch (BuiltinID) {
16769 default:
16770 llvm_unreachable("Unsupported intrinsic!");
16771 case X86::BI__builtin_ia32_sqrtph512:
16772 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
16773 break;
16774 case X86::BI__builtin_ia32_sqrtps512:
16775 IID = Intrinsic::x86_avx512_sqrt_ps_512;
16776 break;
16777 case X86::BI__builtin_ia32_sqrtpd512:
16778 IID = Intrinsic::x86_avx512_sqrt_pd_512;
16779 break;
16780 }
16781 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16782 }
16783 }
16784 if (Builder.getIsFPConstrained()) {
16785 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16786 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16787 Ops[0]->getType());
16788 return Builder.CreateConstrainedFPCall(F, Ops[0]);
16789 } else {
16790 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
16791 return Builder.CreateCall(F, Ops[0]);
16792 }
16793 }
16794
16795 case X86::BI__builtin_ia32_pmuludq128:
16796 case X86::BI__builtin_ia32_pmuludq256:
16797 case X86::BI__builtin_ia32_pmuludq512:
16798 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
16799
16800 case X86::BI__builtin_ia32_pmuldq128:
16801 case X86::BI__builtin_ia32_pmuldq256:
16802 case X86::BI__builtin_ia32_pmuldq512:
16803 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
16804
16805 case X86::BI__builtin_ia32_pternlogd512_mask:
16806 case X86::BI__builtin_ia32_pternlogq512_mask:
16807 case X86::BI__builtin_ia32_pternlogd128_mask:
16808 case X86::BI__builtin_ia32_pternlogd256_mask:
16809 case X86::BI__builtin_ia32_pternlogq128_mask:
16810 case X86::BI__builtin_ia32_pternlogq256_mask:
16811 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
16812
16813 case X86::BI__builtin_ia32_pternlogd512_maskz:
16814 case X86::BI__builtin_ia32_pternlogq512_maskz:
16815 case X86::BI__builtin_ia32_pternlogd128_maskz:
16816 case X86::BI__builtin_ia32_pternlogd256_maskz:
16817 case X86::BI__builtin_ia32_pternlogq128_maskz:
16818 case X86::BI__builtin_ia32_pternlogq256_maskz:
16819 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
16820
16821 case X86::BI__builtin_ia32_vpshldd128:
16822 case X86::BI__builtin_ia32_vpshldd256:
16823 case X86::BI__builtin_ia32_vpshldd512:
16824 case X86::BI__builtin_ia32_vpshldq128:
16825 case X86::BI__builtin_ia32_vpshldq256:
16826 case X86::BI__builtin_ia32_vpshldq512:
16827 case X86::BI__builtin_ia32_vpshldw128:
16828 case X86::BI__builtin_ia32_vpshldw256:
16829 case X86::BI__builtin_ia32_vpshldw512:
16830 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16831
16832 case X86::BI__builtin_ia32_vpshrdd128:
16833 case X86::BI__builtin_ia32_vpshrdd256:
16834 case X86::BI__builtin_ia32_vpshrdd512:
16835 case X86::BI__builtin_ia32_vpshrdq128:
16836 case X86::BI__builtin_ia32_vpshrdq256:
16837 case X86::BI__builtin_ia32_vpshrdq512:
16838 case X86::BI__builtin_ia32_vpshrdw128:
16839 case X86::BI__builtin_ia32_vpshrdw256:
16840 case X86::BI__builtin_ia32_vpshrdw512:
16841 // Ops 0 and 1 are swapped.
16842 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16843
16844 case X86::BI__builtin_ia32_vpshldvd128:
16845 case X86::BI__builtin_ia32_vpshldvd256:
16846 case X86::BI__builtin_ia32_vpshldvd512:
16847 case X86::BI__builtin_ia32_vpshldvq128:
16848 case X86::BI__builtin_ia32_vpshldvq256:
16849 case X86::BI__builtin_ia32_vpshldvq512:
16850 case X86::BI__builtin_ia32_vpshldvw128:
16851 case X86::BI__builtin_ia32_vpshldvw256:
16852 case X86::BI__builtin_ia32_vpshldvw512:
16853 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16854
16855 case X86::BI__builtin_ia32_vpshrdvd128:
16856 case X86::BI__builtin_ia32_vpshrdvd256:
16857 case X86::BI__builtin_ia32_vpshrdvd512:
16858 case X86::BI__builtin_ia32_vpshrdvq128:
16859 case X86::BI__builtin_ia32_vpshrdvq256:
16860 case X86::BI__builtin_ia32_vpshrdvq512:
16861 case X86::BI__builtin_ia32_vpshrdvw128:
16862 case X86::BI__builtin_ia32_vpshrdvw256:
16863 case X86::BI__builtin_ia32_vpshrdvw512:
16864 // Ops 0 and 1 are swapped.
16865 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16866
16867 // Reductions
16868 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16869 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16870 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16871 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16872 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16873 Function *F =
16874 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16875 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16876 Builder.getFastMathFlags().setAllowReassoc();
16877 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16878 }
16879 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16880 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16881 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16882 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16883 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16884 Function *F =
16885 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16886 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16887 Builder.getFastMathFlags().setAllowReassoc();
16888 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16889 }
16890 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16891 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16892 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16893 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16894 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16895 Function *F =
16896 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16897 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16898 Builder.getFastMathFlags().setNoNaNs();
16899 return Builder.CreateCall(F, {Ops[0]});
16900 }
16901 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16902 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16903 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16904 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16905 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16906 Function *F =
16907 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16908 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16909 Builder.getFastMathFlags().setNoNaNs();
16910 return Builder.CreateCall(F, {Ops[0]});
16911 }
16912
16913 case X86::BI__builtin_ia32_rdrand16_step:
16914 case X86::BI__builtin_ia32_rdrand32_step:
16915 case X86::BI__builtin_ia32_rdrand64_step:
16916 case X86::BI__builtin_ia32_rdseed16_step:
16917 case X86::BI__builtin_ia32_rdseed32_step:
16918 case X86::BI__builtin_ia32_rdseed64_step: {
16919 Intrinsic::ID ID;
16920 switch (BuiltinID) {
16921 default: llvm_unreachable("Unsupported intrinsic!");
16922 case X86::BI__builtin_ia32_rdrand16_step:
16923 ID = Intrinsic::x86_rdrand_16;
16924 break;
16925 case X86::BI__builtin_ia32_rdrand32_step:
16926 ID = Intrinsic::x86_rdrand_32;
16927 break;
16928 case X86::BI__builtin_ia32_rdrand64_step:
16929 ID = Intrinsic::x86_rdrand_64;
16930 break;
16931 case X86::BI__builtin_ia32_rdseed16_step:
16932 ID = Intrinsic::x86_rdseed_16;
16933 break;
16934 case X86::BI__builtin_ia32_rdseed32_step:
16935 ID = Intrinsic::x86_rdseed_32;
16936 break;
16937 case X86::BI__builtin_ia32_rdseed64_step:
16938 ID = Intrinsic::x86_rdseed_64;
16939 break;
16940 }
16941
16942 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16943 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16944 Ops[0]);
16945 return Builder.CreateExtractValue(Call, 1);
16946 }
16947 case X86::BI__builtin_ia32_addcarryx_u32:
16948 case X86::BI__builtin_ia32_addcarryx_u64:
16949 case X86::BI__builtin_ia32_subborrow_u32:
16950 case X86::BI__builtin_ia32_subborrow_u64: {
16951 Intrinsic::ID IID;
16952 switch (BuiltinID) {
16953 default: llvm_unreachable("Unsupported intrinsic!");
16954 case X86::BI__builtin_ia32_addcarryx_u32:
16955 IID = Intrinsic::x86_addcarry_32;
16956 break;
16957 case X86::BI__builtin_ia32_addcarryx_u64:
16958 IID = Intrinsic::x86_addcarry_64;
16959 break;
16960 case X86::BI__builtin_ia32_subborrow_u32:
16961 IID = Intrinsic::x86_subborrow_32;
16962 break;
16963 case X86::BI__builtin_ia32_subborrow_u64:
16964 IID = Intrinsic::x86_subborrow_64;
16965 break;
16966 }
16967
16968 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16969 { Ops[0], Ops[1], Ops[2] });
16970 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16971 Ops[3]);
16972 return Builder.CreateExtractValue(Call, 0);
16973 }
16974
16975 case X86::BI__builtin_ia32_fpclassps128_mask:
16976 case X86::BI__builtin_ia32_fpclassps256_mask:
16977 case X86::BI__builtin_ia32_fpclassps512_mask:
16978 case X86::BI__builtin_ia32_vfpclassbf16128_mask:
16979 case X86::BI__builtin_ia32_vfpclassbf16256_mask:
16980 case X86::BI__builtin_ia32_vfpclassbf16512_mask:
16981 case X86::BI__builtin_ia32_fpclassph128_mask:
16982 case X86::BI__builtin_ia32_fpclassph256_mask:
16983 case X86::BI__builtin_ia32_fpclassph512_mask:
16984 case X86::BI__builtin_ia32_fpclasspd128_mask:
16985 case X86::BI__builtin_ia32_fpclasspd256_mask:
16986 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16987 unsigned NumElts =
16988 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16989 Value *MaskIn = Ops[2];
16990 Ops.erase(&Ops[2]);
16991
16992 Intrinsic::ID ID;
16993 switch (BuiltinID) {
16994 default: llvm_unreachable("Unsupported intrinsic!");
16995 case X86::BI__builtin_ia32_vfpclassbf16128_mask:
16996 ID = Intrinsic::x86_avx10_fpclass_bf16_128;
16997 break;
16998 case X86::BI__builtin_ia32_vfpclassbf16256_mask:
16999 ID = Intrinsic::x86_avx10_fpclass_bf16_256;
17000 break;
17001 case X86::BI__builtin_ia32_vfpclassbf16512_mask:
17002 ID = Intrinsic::x86_avx10_fpclass_bf16_512;
17003 break;
17004 case X86::BI__builtin_ia32_fpclassph128_mask:
17005 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
17006 break;
17007 case X86::BI__builtin_ia32_fpclassph256_mask:
17008 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
17009 break;
17010 case X86::BI__builtin_ia32_fpclassph512_mask:
17011 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
17012 break;
17013 case X86::BI__builtin_ia32_fpclassps128_mask:
17014 ID = Intrinsic::x86_avx512_fpclass_ps_128;
17015 break;
17016 case X86::BI__builtin_ia32_fpclassps256_mask:
17017 ID = Intrinsic::x86_avx512_fpclass_ps_256;
17018 break;
17019 case X86::BI__builtin_ia32_fpclassps512_mask:
17020 ID = Intrinsic::x86_avx512_fpclass_ps_512;
17021 break;
17022 case X86::BI__builtin_ia32_fpclasspd128_mask:
17023 ID = Intrinsic::x86_avx512_fpclass_pd_128;
17024 break;
17025 case X86::BI__builtin_ia32_fpclasspd256_mask:
17026 ID = Intrinsic::x86_avx512_fpclass_pd_256;
17027 break;
17028 case X86::BI__builtin_ia32_fpclasspd512_mask:
17029 ID = Intrinsic::x86_avx512_fpclass_pd_512;
17030 break;
17031 }
17032
17033 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
17034 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
17035 }
17036
17037 case X86::BI__builtin_ia32_vp2intersect_q_512:
17038 case X86::BI__builtin_ia32_vp2intersect_q_256:
17039 case X86::BI__builtin_ia32_vp2intersect_q_128:
17040 case X86::BI__builtin_ia32_vp2intersect_d_512:
17041 case X86::BI__builtin_ia32_vp2intersect_d_256:
17042 case X86::BI__builtin_ia32_vp2intersect_d_128: {
17043 unsigned NumElts =
17044 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17045 Intrinsic::ID ID;
17046
17047 switch (BuiltinID) {
17048 default: llvm_unreachable("Unsupported intrinsic!");
17049 case X86::BI__builtin_ia32_vp2intersect_q_512:
17050 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
17051 break;
17052 case X86::BI__builtin_ia32_vp2intersect_q_256:
17053 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
17054 break;
17055 case X86::BI__builtin_ia32_vp2intersect_q_128:
17056 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
17057 break;
17058 case X86::BI__builtin_ia32_vp2intersect_d_512:
17059 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
17060 break;
17061 case X86::BI__builtin_ia32_vp2intersect_d_256:
17062 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
17063 break;
17064 case X86::BI__builtin_ia32_vp2intersect_d_128:
17065 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
17066 break;
17067 }
17068
17069 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
17070 Value *Result = Builder.CreateExtractValue(Call, 0);
17071 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
17073
17074 Result = Builder.CreateExtractValue(Call, 1);
17075 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
17077 }
17078
17079 case X86::BI__builtin_ia32_vpmultishiftqb128:
17080 case X86::BI__builtin_ia32_vpmultishiftqb256:
17081 case X86::BI__builtin_ia32_vpmultishiftqb512: {
17082 Intrinsic::ID ID;
17083 switch (BuiltinID) {
17084 default: llvm_unreachable("Unsupported intrinsic!");
17085 case X86::BI__builtin_ia32_vpmultishiftqb128:
17086 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
17087 break;
17088 case X86::BI__builtin_ia32_vpmultishiftqb256:
17089 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
17090 break;
17091 case X86::BI__builtin_ia32_vpmultishiftqb512:
17092 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
17093 break;
17094 }
17095
17096 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
17097 }
17098
17099 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
17100 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
17101 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
17102 unsigned NumElts =
17103 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17104 Value *MaskIn = Ops[2];
17105 Ops.erase(&Ops[2]);
17106
17107 Intrinsic::ID ID;
17108 switch (BuiltinID) {
17109 default: llvm_unreachable("Unsupported intrinsic!");
17110 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
17111 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
17112 break;
17113 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
17114 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
17115 break;
17116 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
17117 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
17118 break;
17119 }
17120
17121 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
17122 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
17123 }
17124
17125 // packed comparison intrinsics
17126 case X86::BI__builtin_ia32_cmpeqps:
17127 case X86::BI__builtin_ia32_cmpeqpd:
17128 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
17129 case X86::BI__builtin_ia32_cmpltps:
17130 case X86::BI__builtin_ia32_cmpltpd:
17131 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
17132 case X86::BI__builtin_ia32_cmpleps:
17133 case X86::BI__builtin_ia32_cmplepd:
17134 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
17135 case X86::BI__builtin_ia32_cmpunordps:
17136 case X86::BI__builtin_ia32_cmpunordpd:
17137 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
17138 case X86::BI__builtin_ia32_cmpneqps:
17139 case X86::BI__builtin_ia32_cmpneqpd:
17140 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
17141 case X86::BI__builtin_ia32_cmpnltps:
17142 case X86::BI__builtin_ia32_cmpnltpd:
17143 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
17144 case X86::BI__builtin_ia32_cmpnleps:
17145 case X86::BI__builtin_ia32_cmpnlepd:
17146 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
17147 case X86::BI__builtin_ia32_cmpordps:
17148 case X86::BI__builtin_ia32_cmpordpd:
17149 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
17150 case X86::BI__builtin_ia32_cmpph128_mask:
17151 case X86::BI__builtin_ia32_cmpph256_mask:
17152 case X86::BI__builtin_ia32_cmpph512_mask:
17153 case X86::BI__builtin_ia32_cmpps128_mask:
17154 case X86::BI__builtin_ia32_cmpps256_mask:
17155 case X86::BI__builtin_ia32_cmpps512_mask:
17156 case X86::BI__builtin_ia32_cmppd128_mask:
17157 case X86::BI__builtin_ia32_cmppd256_mask:
17158 case X86::BI__builtin_ia32_cmppd512_mask:
17159 case X86::BI__builtin_ia32_vcmppd256_round_mask:
17160 case X86::BI__builtin_ia32_vcmpps256_round_mask:
17161 case X86::BI__builtin_ia32_vcmpph256_round_mask:
17162 case X86::BI__builtin_ia32_vcmpbf16512_mask:
17163 case X86::BI__builtin_ia32_vcmpbf16256_mask:
17164 case X86::BI__builtin_ia32_vcmpbf16128_mask:
17165 IsMaskFCmp = true;
17166 [[fallthrough]];
17167 case X86::BI__builtin_ia32_cmpps:
17168 case X86::BI__builtin_ia32_cmpps256:
17169 case X86::BI__builtin_ia32_cmppd:
17170 case X86::BI__builtin_ia32_cmppd256: {
17171 // Lowering vector comparisons to fcmp instructions, while
17172 // ignoring signalling behaviour requested
17173 // ignoring rounding mode requested
17174 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
17175
17176 // The third argument is the comparison condition, and integer in the
17177 // range [0, 31]
17178 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
17179
17180 // Lowering to IR fcmp instruction.
17181 // Ignoring requested signaling behaviour,
17182 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
17183 FCmpInst::Predicate Pred;
17184 bool IsSignaling;
17185 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
17186 // behavior is inverted. We'll handle that after the switch.
17187 switch (CC & 0xf) {
17188 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
17189 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
17190 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
17191 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
17192 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
17193 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
17194 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
17195 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
17196 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
17197 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
17198 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
17199 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
17200 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
17201 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
17202 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
17203 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
17204 default: llvm_unreachable("Unhandled CC");
17205 }
17206
17207 // Invert the signalling behavior for 16-31.
17208 if (CC & 0x10)
17209 IsSignaling = !IsSignaling;
17210
17211 // If the predicate is true or false and we're using constrained intrinsics,
17212 // we don't have a compare intrinsic we can use. Just use the legacy X86
17213 // specific intrinsic.
17214 // If the intrinsic is mask enabled and we're using constrained intrinsics,
17215 // use the legacy X86 specific intrinsic.
17216 if (Builder.getIsFPConstrained() &&
17217 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
17218 IsMaskFCmp)) {
17219
17220 Intrinsic::ID IID;
17221 switch (BuiltinID) {
17222 default: llvm_unreachable("Unexpected builtin");
17223 case X86::BI__builtin_ia32_cmpps:
17224 IID = Intrinsic::x86_sse_cmp_ps;
17225 break;
17226 case X86::BI__builtin_ia32_cmpps256:
17227 IID = Intrinsic::x86_avx_cmp_ps_256;
17228 break;
17229 case X86::BI__builtin_ia32_cmppd:
17230 IID = Intrinsic::x86_sse2_cmp_pd;
17231 break;
17232 case X86::BI__builtin_ia32_cmppd256:
17233 IID = Intrinsic::x86_avx_cmp_pd_256;
17234 break;
17235 case X86::BI__builtin_ia32_cmpph128_mask:
17236 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
17237 break;
17238 case X86::BI__builtin_ia32_cmpph256_mask:
17239 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
17240 break;
17241 case X86::BI__builtin_ia32_cmpph512_mask:
17242 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
17243 break;
17244 case X86::BI__builtin_ia32_cmpps512_mask:
17245 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
17246 break;
17247 case X86::BI__builtin_ia32_cmppd512_mask:
17248 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
17249 break;
17250 case X86::BI__builtin_ia32_cmpps128_mask:
17251 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
17252 break;
17253 case X86::BI__builtin_ia32_cmpps256_mask:
17254 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
17255 break;
17256 case X86::BI__builtin_ia32_cmppd128_mask:
17257 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
17258 break;
17259 case X86::BI__builtin_ia32_cmppd256_mask:
17260 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
17261 break;
17262 }
17263
17264 Function *Intr = CGM.getIntrinsic(IID);
17265 if (IsMaskFCmp) {
17266 unsigned NumElts =
17267 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17268 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
17269 Value *Cmp = Builder.CreateCall(Intr, Ops);
17270 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
17271 }
17272
17273 return Builder.CreateCall(Intr, Ops);
17274 }
17275
17276 // Builtins without the _mask suffix return a vector of integers
17277 // of the same width as the input vectors
17278 if (IsMaskFCmp) {
17279 // We ignore SAE if strict FP is disabled. We only keep precise
17280 // exception behavior under strict FP.
17281 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
17282 // object will be required.
17283 unsigned NumElts =
17284 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17285 Value *Cmp;
17286 if (IsSignaling)
17287 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
17288 else
17289 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
17290 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
17291 }
17292
17293 return getVectorFCmpIR(Pred, IsSignaling);
17294 }
17295
17296 // SSE scalar comparison intrinsics
17297 case X86::BI__builtin_ia32_cmpeqss:
17298 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
17299 case X86::BI__builtin_ia32_cmpltss:
17300 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
17301 case X86::BI__builtin_ia32_cmpless:
17302 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
17303 case X86::BI__builtin_ia32_cmpunordss:
17304 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
17305 case X86::BI__builtin_ia32_cmpneqss:
17306 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
17307 case X86::BI__builtin_ia32_cmpnltss:
17308 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
17309 case X86::BI__builtin_ia32_cmpnless:
17310 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
17311 case X86::BI__builtin_ia32_cmpordss:
17312 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
17313 case X86::BI__builtin_ia32_cmpeqsd:
17314 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
17315 case X86::BI__builtin_ia32_cmpltsd:
17316 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
17317 case X86::BI__builtin_ia32_cmplesd:
17318 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
17319 case X86::BI__builtin_ia32_cmpunordsd:
17320 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
17321 case X86::BI__builtin_ia32_cmpneqsd:
17322 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
17323 case X86::BI__builtin_ia32_cmpnltsd:
17324 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
17325 case X86::BI__builtin_ia32_cmpnlesd:
17326 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
17327 case X86::BI__builtin_ia32_cmpordsd:
17328 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
17329
17330 // f16c half2float intrinsics
17331 case X86::BI__builtin_ia32_vcvtph2ps:
17332 case X86::BI__builtin_ia32_vcvtph2ps256:
17333 case X86::BI__builtin_ia32_vcvtph2ps_mask:
17334 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
17335 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
17336 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
17337 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
17338 }
17339
17340 // AVX512 bf16 intrinsics
17341 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
17342 Ops[2] = getMaskVecValue(
17343 *this, Ops[2],
17344 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
17345 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
17346 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17347 }
17348 case X86::BI__builtin_ia32_cvtsbf162ss_32:
17349 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
17350
17351 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17352 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
17353 Intrinsic::ID IID;
17354 switch (BuiltinID) {
17355 default: llvm_unreachable("Unsupported intrinsic!");
17356 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17357 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
17358 break;
17359 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
17360 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
17361 break;
17362 }
17363 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
17364 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
17365 }
17366
17367 case X86::BI__cpuid:
17368 case X86::BI__cpuidex: {
17369 Value *FuncId = EmitScalarExpr(E->getArg(1));
17370 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
17371 ? EmitScalarExpr(E->getArg(2))
17372 : llvm::ConstantInt::get(Int32Ty, 0);
17373
17374 llvm::StructType *CpuidRetTy =
17375 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
17376 llvm::FunctionType *FTy =
17377 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
17378
17379 StringRef Asm, Constraints;
17380 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
17381 Asm = "cpuid";
17382 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
17383 } else {
17384 // x86-64 uses %rbx as the base register, so preserve it.
17385 Asm = "xchgq %rbx, ${1:q}\n"
17386 "cpuid\n"
17387 "xchgq %rbx, ${1:q}";
17388 Constraints = "={ax},=r,={cx},={dx},0,2";
17389 }
17390
17391 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
17392 /*hasSideEffects=*/false);
17393 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
17394 Value *BasePtr = EmitScalarExpr(E->getArg(0));
17395 Value *Store = nullptr;
17396 for (unsigned i = 0; i < 4; i++) {
17397 Value *Extracted = Builder.CreateExtractValue(IACall, i);
17398 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
17399 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
17400 }
17401
17402 // Return the last store instruction to signal that we have emitted the
17403 // the intrinsic.
17404 return Store;
17405 }
17406
17407 case X86::BI__emul:
17408 case X86::BI__emulu: {
17409 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
17410 bool isSigned = (BuiltinID == X86::BI__emul);
17411 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
17412 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
17413 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
17414 }
17415 case X86::BI__mulh:
17416 case X86::BI__umulh:
17417 case X86::BI_mul128:
17418 case X86::BI_umul128: {
17419 llvm::Type *ResType = ConvertType(E->getType());
17420 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17421
17422 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
17423 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
17424 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
17425
17426 Value *MulResult, *HigherBits;
17427 if (IsSigned) {
17428 MulResult = Builder.CreateNSWMul(LHS, RHS);
17429 HigherBits = Builder.CreateAShr(MulResult, 64);
17430 } else {
17431 MulResult = Builder.CreateNUWMul(LHS, RHS);
17432 HigherBits = Builder.CreateLShr(MulResult, 64);
17433 }
17434 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
17435
17436 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
17437 return HigherBits;
17438
17439 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
17440 Builder.CreateStore(HigherBits, HighBitsAddress);
17441 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
17442 }
17443
17444 case X86::BI__faststorefence: {
17445 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17446 llvm::SyncScope::System);
17447 }
17448 case X86::BI__shiftleft128:
17449 case X86::BI__shiftright128: {
17450 llvm::Function *F = CGM.getIntrinsic(
17451 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
17452 Int64Ty);
17453 // Flip low/high ops and zero-extend amount to matching type.
17454 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
17455 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
17456 std::swap(Ops[0], Ops[1]);
17457 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
17458 return Builder.CreateCall(F, Ops);
17459 }
17460 case X86::BI_ReadWriteBarrier:
17461 case X86::BI_ReadBarrier:
17462 case X86::BI_WriteBarrier: {
17463 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17464 llvm::SyncScope::SingleThread);
17465 }
17466
17467 case X86::BI_AddressOfReturnAddress: {
17468 Function *F =
17469 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
17470 return Builder.CreateCall(F);
17471 }
17472 case X86::BI__stosb: {
17473 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
17474 // instruction, but it will create a memset that won't be optimized away.
17475 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
17476 }
17477 // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
17478 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17479 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17480 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17481 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17482 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17483 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17484 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17485 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
17486 Intrinsic::ID IID;
17487 switch (BuiltinID) {
17488 default:
17489 llvm_unreachable("Unsupported intrinsic!");
17490 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17491 IID = Intrinsic::x86_t2rpntlvwz0_internal;
17492 break;
17493 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17494 IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
17495 break;
17496 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17497 IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
17498 break;
17499 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17500 IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
17501 break;
17502 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17503 IID = Intrinsic::x86_t2rpntlvwz1_internal;
17504 break;
17505 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17506 IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
17507 break;
17508 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17509 IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
17510 break;
17511 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
17512 IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
17513 break;
17514 }
17515
17516 // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
17517 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
17518 {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
17519
17520 auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
17521 assert(PtrTy && "arg3 must be of pointer type");
17522 QualType PtreeTy = PtrTy->getPointeeType();
17523 llvm::Type *TyPtee = ConvertType(PtreeTy);
17524
17525 // Bitcast amx type (x86_amx) to vector type (256 x i32)
17526 // Then store tile0 into DstPtr0
17527 Value *T0 = Builder.CreateExtractValue(Call, 0);
17528 Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17529 {TyPtee}, {T0});
17530 Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
17531
17532 // Then store tile1 into DstPtr1
17533 Value *T1 = Builder.CreateExtractValue(Call, 1);
17534 Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17535 {TyPtee}, {T1});
17536 Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
17537
17538 // Note: Here we escape directly use x86_tilestored64_internal to store
17539 // the results due to it can't make sure the Mem written scope. This may
17540 // cause shapes reloads after first amx intrinsic, which current amx reg-
17541 // ister allocation has no ability to handle it.
17542
17543 return Store;
17544 }
17545 case X86::BI__ud2:
17546 // llvm.trap makes a ud2a instruction on x86.
17547 return EmitTrapCall(Intrinsic::trap);
17548 case X86::BI__int2c: {
17549 // This syscall signals a driver assertion failure in x86 NT kernels.
17550 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
17551 llvm::InlineAsm *IA =
17552 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
17553 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
17554 getLLVMContext(), llvm::AttributeList::FunctionIndex,
17555 llvm::Attribute::NoReturn);
17556 llvm::CallInst *CI = Builder.CreateCall(IA);
17557 CI->setAttributes(NoReturnAttr);
17558 return CI;
17559 }
17560 case X86::BI__readfsbyte:
17561 case X86::BI__readfsword:
17562 case X86::BI__readfsdword:
17563 case X86::BI__readfsqword: {
17564 llvm::Type *IntTy = ConvertType(E->getType());
17565 Value *Ptr = Builder.CreateIntToPtr(
17566 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
17567 LoadInst *Load = Builder.CreateAlignedLoad(
17568 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17569 Load->setVolatile(true);
17570 return Load;
17571 }
17572 case X86::BI__readgsbyte:
17573 case X86::BI__readgsword:
17574 case X86::BI__readgsdword:
17575 case X86::BI__readgsqword: {
17576 llvm::Type *IntTy = ConvertType(E->getType());
17577 Value *Ptr = Builder.CreateIntToPtr(
17578 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
17579 LoadInst *Load = Builder.CreateAlignedLoad(
17580 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17581 Load->setVolatile(true);
17582 return Load;
17583 }
17584 case X86::BI__builtin_ia32_encodekey128_u32: {
17585 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
17586
17587 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
17588
17589 for (int i = 0; i < 3; ++i) {
17590 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17591 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
17592 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17593 }
17594
17595 return Builder.CreateExtractValue(Call, 0);
17596 }
17597 case X86::BI__builtin_ia32_encodekey256_u32: {
17598 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
17599
17600 Value *Call =
17601 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
17602
17603 for (int i = 0; i < 4; ++i) {
17604 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17605 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
17606 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17607 }
17608
17609 return Builder.CreateExtractValue(Call, 0);
17610 }
17611 case X86::BI__builtin_ia32_aesenc128kl_u8:
17612 case X86::BI__builtin_ia32_aesdec128kl_u8:
17613 case X86::BI__builtin_ia32_aesenc256kl_u8:
17614 case X86::BI__builtin_ia32_aesdec256kl_u8: {
17615 Intrinsic::ID IID;
17616 StringRef BlockName;
17617 switch (BuiltinID) {
17618 default:
17619 llvm_unreachable("Unexpected builtin");
17620 case X86::BI__builtin_ia32_aesenc128kl_u8:
17621 IID = Intrinsic::x86_aesenc128kl;
17622 BlockName = "aesenc128kl";
17623 break;
17624 case X86::BI__builtin_ia32_aesdec128kl_u8:
17625 IID = Intrinsic::x86_aesdec128kl;
17626 BlockName = "aesdec128kl";
17627 break;
17628 case X86::BI__builtin_ia32_aesenc256kl_u8:
17629 IID = Intrinsic::x86_aesenc256kl;
17630 BlockName = "aesenc256kl";
17631 break;
17632 case X86::BI__builtin_ia32_aesdec256kl_u8:
17633 IID = Intrinsic::x86_aesdec256kl;
17634 BlockName = "aesdec256kl";
17635 break;
17636 }
17637
17638 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
17639
17640 BasicBlock *NoError =
17641 createBasicBlock(BlockName + "_no_error", this->CurFn);
17642 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17643 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17644
17645 Value *Ret = Builder.CreateExtractValue(Call, 0);
17646 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17647 Value *Out = Builder.CreateExtractValue(Call, 1);
17648 Builder.CreateCondBr(Succ, NoError, Error);
17649
17650 Builder.SetInsertPoint(NoError);
17652 Builder.CreateBr(End);
17653
17654 Builder.SetInsertPoint(Error);
17655 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17656 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
17657 Builder.CreateBr(End);
17658
17659 Builder.SetInsertPoint(End);
17660 return Builder.CreateExtractValue(Call, 0);
17661 }
17662 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17663 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17664 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17665 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
17666 Intrinsic::ID IID;
17667 StringRef BlockName;
17668 switch (BuiltinID) {
17669 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17670 IID = Intrinsic::x86_aesencwide128kl;
17671 BlockName = "aesencwide128kl";
17672 break;
17673 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17674 IID = Intrinsic::x86_aesdecwide128kl;
17675 BlockName = "aesdecwide128kl";
17676 break;
17677 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17678 IID = Intrinsic::x86_aesencwide256kl;
17679 BlockName = "aesencwide256kl";
17680 break;
17681 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
17682 IID = Intrinsic::x86_aesdecwide256kl;
17683 BlockName = "aesdecwide256kl";
17684 break;
17685 }
17686
17687 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
17688 Value *InOps[9];
17689 InOps[0] = Ops[2];
17690 for (int i = 0; i != 8; ++i) {
17691 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
17692 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
17693 }
17694
17695 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
17696
17697 BasicBlock *NoError =
17698 createBasicBlock(BlockName + "_no_error", this->CurFn);
17699 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17700 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17701
17702 Value *Ret = Builder.CreateExtractValue(Call, 0);
17703 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17704 Builder.CreateCondBr(Succ, NoError, Error);
17705
17706 Builder.SetInsertPoint(NoError);
17707 for (int i = 0; i != 8; ++i) {
17708 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17709 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
17710 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
17711 }
17712 Builder.CreateBr(End);
17713
17714 Builder.SetInsertPoint(Error);
17715 for (int i = 0; i != 8; ++i) {
17716 Value *Out = Builder.CreateExtractValue(Call, i + 1);
17717 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17718 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
17719 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
17720 }
17721 Builder.CreateBr(End);
17722
17723 Builder.SetInsertPoint(End);
17724 return Builder.CreateExtractValue(Call, 0);
17725 }
17726 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
17727 IsConjFMA = true;
17728 [[fallthrough]];
17729 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
17730 Intrinsic::ID IID = IsConjFMA
17731 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
17732 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
17733 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17734 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17735 }
17736 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
17737 IsConjFMA = true;
17738 LLVM_FALLTHROUGH;
17739 case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
17740 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
17741 : Intrinsic::x86_avx10_mask_vfmaddcph256;
17742 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17743 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17744 }
17745 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
17746 IsConjFMA = true;
17747 [[fallthrough]];
17748 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
17749 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17750 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17751 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17752 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
17753 return EmitX86Select(*this, And, Call, Ops[0]);
17754 }
17755 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
17756 IsConjFMA = true;
17757 [[fallthrough]];
17758 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
17759 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17760 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17761 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17762 static constexpr int Mask[] = {0, 5, 6, 7};
17763 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
17764 }
17765 case X86::BI__builtin_ia32_prefetchi:
17766 return Builder.CreateCall(
17767 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
17768 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
17769 llvm::ConstantInt::get(Int32Ty, 0)});
17770 }
17771}
17772
17773Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
17774 const CallExpr *E) {
17775 // Do not emit the builtin arguments in the arguments of a function call,
17776 // because the evaluation order of function arguments is not specified in C++.
17777 // This is important when testing to ensure the arguments are emitted in the
17778 // same order every time. Eg:
17779 // Instead of:
17780 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
17781 // EmitScalarExpr(E->getArg(1)), "swdiv");
17782 // Use:
17783 // Value *Op0 = EmitScalarExpr(E->getArg(0));
17784 // Value *Op1 = EmitScalarExpr(E->getArg(1));
17785 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
17786
17787 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17788
17789#include "llvm/TargetParser/PPCTargetParser.def"
17790 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
17791 unsigned Mask, CmpInst::Predicate CompOp,
17792 unsigned OpValue) -> Value * {
17793 if (SupportMethod == BUILTIN_PPC_FALSE)
17794 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17795
17796 if (SupportMethod == BUILTIN_PPC_TRUE)
17797 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
17798
17799 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
17800
17801 llvm::Value *FieldValue = nullptr;
17802 if (SupportMethod == USE_SYS_CONF) {
17803 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
17804 llvm::Constant *SysConf =
17805 CGM.CreateRuntimeVariable(STy, "_system_configuration");
17806
17807 // Grab the appropriate field from _system_configuration.
17808 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
17809 ConstantInt::get(Int32Ty, FieldIdx)};
17810
17811 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
17812 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
17814 } else if (SupportMethod == SYS_CALL) {
17815 llvm::FunctionType *FTy =
17816 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
17817 llvm::FunctionCallee Func =
17818 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
17819
17820 FieldValue =
17821 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
17822 }
17823 assert(FieldValue &&
17824 "SupportMethod value is not defined in PPCTargetParser.def.");
17825
17826 if (Mask)
17827 FieldValue = Builder.CreateAnd(FieldValue, Mask);
17828
17829 llvm::Type *ValueType = FieldValue->getType();
17830 bool IsValueType64Bit = ValueType->isIntegerTy(64);
17831 assert(
17832 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
17833 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17834
17835 return Builder.CreateICmp(
17836 CompOp, FieldValue,
17837 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
17838 };
17839
17840 switch (BuiltinID) {
17841 default: return nullptr;
17842
17843 case Builtin::BI__builtin_cpu_is: {
17844 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17845 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17846 llvm::Triple Triple = getTarget().getTriple();
17847
17848 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
17849 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
17850
17851 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
17852 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
17853#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17854 AIXID) \
17855 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17856#include "llvm/TargetParser/PPCTargetParser.def"
17857 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
17858 BUILTIN_PPC_UNSUPPORTED, 0}));
17859
17860 if (Triple.isOSAIX()) {
17861 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17862 "Invalid CPU name. Missed by SemaChecking?");
17863 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
17864 ICmpInst::ICMP_EQ, AIXIDValue);
17865 }
17866
17867 assert(Triple.isOSLinux() &&
17868 "__builtin_cpu_is() is only supported for AIX and Linux.");
17869
17870 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17871 "Invalid CPU name. Missed by SemaChecking?");
17872
17873 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
17874 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17875
17876 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
17877 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17878 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
17879 return Builder.CreateICmpEQ(TheCall,
17880 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
17881 }
17882 case Builtin::BI__builtin_cpu_supports: {
17883 llvm::Triple Triple = getTarget().getTriple();
17884 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17885 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17886 if (Triple.isOSAIX()) {
17887 unsigned SupportMethod, FieldIdx, Mask, Value;
17888 CmpInst::Predicate CompOp;
17889 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
17890 unsigned>
17891 CPUSupportType;
17892 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
17893 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
17894#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17895 VALUE) \
17896 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17897#include "llvm/TargetParser/PPCTargetParser.def"
17898 .Default({BUILTIN_PPC_FALSE, 0, 0,
17899 CmpInst::Predicate(), 0}));
17900 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17901 Value);
17902 }
17903
17904 assert(Triple.isOSLinux() &&
17905 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17906 unsigned FeatureWord;
17907 unsigned BitMask;
17908 std::tie(FeatureWord, BitMask) =
17909 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17910#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17911 .Case(Name, {FA_WORD, Bitmask})
17912#include "llvm/TargetParser/PPCTargetParser.def"
17913 .Default({0, 0});
17914 if (!BitMask)
17915 return Builder.getFalse();
17916 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
17917 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17918 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
17919 Value *Mask =
17920 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
17921 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
17922#undef PPC_FAWORD_HWCAP
17923#undef PPC_FAWORD_HWCAP2
17924#undef PPC_FAWORD_CPUID
17925 }
17926
17927 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17928 // call __builtin_readcyclecounter.
17929 case PPC::BI__builtin_ppc_get_timebase:
17930 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
17931
17932 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17933 case PPC::BI__builtin_altivec_lvx:
17934 case PPC::BI__builtin_altivec_lvxl:
17935 case PPC::BI__builtin_altivec_lvebx:
17936 case PPC::BI__builtin_altivec_lvehx:
17937 case PPC::BI__builtin_altivec_lvewx:
17938 case PPC::BI__builtin_altivec_lvsl:
17939 case PPC::BI__builtin_altivec_lvsr:
17940 case PPC::BI__builtin_vsx_lxvd2x:
17941 case PPC::BI__builtin_vsx_lxvw4x:
17942 case PPC::BI__builtin_vsx_lxvd2x_be:
17943 case PPC::BI__builtin_vsx_lxvw4x_be:
17944 case PPC::BI__builtin_vsx_lxvl:
17945 case PPC::BI__builtin_vsx_lxvll:
17946 {
17948 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17949 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17950 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17951 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17952 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17953 Ops.pop_back();
17954 }
17955
17956 switch (BuiltinID) {
17957 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17958 case PPC::BI__builtin_altivec_lvx:
17959 ID = Intrinsic::ppc_altivec_lvx;
17960 break;
17961 case PPC::BI__builtin_altivec_lvxl:
17962 ID = Intrinsic::ppc_altivec_lvxl;
17963 break;
17964 case PPC::BI__builtin_altivec_lvebx:
17965 ID = Intrinsic::ppc_altivec_lvebx;
17966 break;
17967 case PPC::BI__builtin_altivec_lvehx:
17968 ID = Intrinsic::ppc_altivec_lvehx;
17969 break;
17970 case PPC::BI__builtin_altivec_lvewx:
17971 ID = Intrinsic::ppc_altivec_lvewx;
17972 break;
17973 case PPC::BI__builtin_altivec_lvsl:
17974 ID = Intrinsic::ppc_altivec_lvsl;
17975 break;
17976 case PPC::BI__builtin_altivec_lvsr:
17977 ID = Intrinsic::ppc_altivec_lvsr;
17978 break;
17979 case PPC::BI__builtin_vsx_lxvd2x:
17980 ID = Intrinsic::ppc_vsx_lxvd2x;
17981 break;
17982 case PPC::BI__builtin_vsx_lxvw4x:
17983 ID = Intrinsic::ppc_vsx_lxvw4x;
17984 break;
17985 case PPC::BI__builtin_vsx_lxvd2x_be:
17986 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17987 break;
17988 case PPC::BI__builtin_vsx_lxvw4x_be:
17989 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17990 break;
17991 case PPC::BI__builtin_vsx_lxvl:
17992 ID = Intrinsic::ppc_vsx_lxvl;
17993 break;
17994 case PPC::BI__builtin_vsx_lxvll:
17995 ID = Intrinsic::ppc_vsx_lxvll;
17996 break;
17997 }
17998 llvm::Function *F = CGM.getIntrinsic(ID);
17999 return Builder.CreateCall(F, Ops, "");
18000 }
18001
18002 // vec_st, vec_xst_be
18003 case PPC::BI__builtin_altivec_stvx:
18004 case PPC::BI__builtin_altivec_stvxl:
18005 case PPC::BI__builtin_altivec_stvebx:
18006 case PPC::BI__builtin_altivec_stvehx:
18007 case PPC::BI__builtin_altivec_stvewx:
18008 case PPC::BI__builtin_vsx_stxvd2x:
18009 case PPC::BI__builtin_vsx_stxvw4x:
18010 case PPC::BI__builtin_vsx_stxvd2x_be:
18011 case PPC::BI__builtin_vsx_stxvw4x_be:
18012 case PPC::BI__builtin_vsx_stxvl:
18013 case PPC::BI__builtin_vsx_stxvll:
18014 {
18016 Ops.push_back(EmitScalarExpr(E->getArg(0)));
18017 Ops.push_back(EmitScalarExpr(E->getArg(1)));
18018 Ops.push_back(EmitScalarExpr(E->getArg(2)));
18019 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
18020 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
18021 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
18022 Ops.pop_back();
18023 }
18024
18025 switch (BuiltinID) {
18026 default: llvm_unreachable("Unsupported st intrinsic!");
18027 case PPC::BI__builtin_altivec_stvx:
18028 ID = Intrinsic::ppc_altivec_stvx;
18029 break;
18030 case PPC::BI__builtin_altivec_stvxl:
18031 ID = Intrinsic::ppc_altivec_stvxl;
18032 break;
18033 case PPC::BI__builtin_altivec_stvebx:
18034 ID = Intrinsic::ppc_altivec_stvebx;
18035 break;
18036 case PPC::BI__builtin_altivec_stvehx:
18037 ID = Intrinsic::ppc_altivec_stvehx;
18038 break;
18039 case PPC::BI__builtin_altivec_stvewx:
18040 ID = Intrinsic::ppc_altivec_stvewx;
18041 break;
18042 case PPC::BI__builtin_vsx_stxvd2x:
18043 ID = Intrinsic::ppc_vsx_stxvd2x;
18044 break;
18045 case PPC::BI__builtin_vsx_stxvw4x:
18046 ID = Intrinsic::ppc_vsx_stxvw4x;
18047 break;
18048 case PPC::BI__builtin_vsx_stxvd2x_be:
18049 ID = Intrinsic::ppc_vsx_stxvd2x_be;
18050 break;
18051 case PPC::BI__builtin_vsx_stxvw4x_be:
18052 ID = Intrinsic::ppc_vsx_stxvw4x_be;
18053 break;
18054 case PPC::BI__builtin_vsx_stxvl:
18055 ID = Intrinsic::ppc_vsx_stxvl;
18056 break;
18057 case PPC::BI__builtin_vsx_stxvll:
18058 ID = Intrinsic::ppc_vsx_stxvll;
18059 break;
18060 }
18061 llvm::Function *F = CGM.getIntrinsic(ID);
18062 return Builder.CreateCall(F, Ops, "");
18063 }
18064 case PPC::BI__builtin_vsx_ldrmb: {
18065 // Essentially boils down to performing an unaligned VMX load sequence so
18066 // as to avoid crossing a page boundary and then shuffling the elements
18067 // into the right side of the vector register.
18068 Value *Op0 = EmitScalarExpr(E->getArg(0));
18069 Value *Op1 = EmitScalarExpr(E->getArg(1));
18070 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
18071 llvm::Type *ResTy = ConvertType(E->getType());
18072 bool IsLE = getTarget().isLittleEndian();
18073
18074 // If the user wants the entire vector, just load the entire vector.
18075 if (NumBytes == 16) {
18076 Value *LD =
18078 if (!IsLE)
18079 return LD;
18080
18081 // Reverse the bytes on LE.
18082 SmallVector<int, 16> RevMask;
18083 for (int Idx = 0; Idx < 16; Idx++)
18084 RevMask.push_back(15 - Idx);
18085 return Builder.CreateShuffleVector(LD, LD, RevMask);
18086 }
18087
18088 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
18089 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
18090 : Intrinsic::ppc_altivec_lvsl);
18091 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
18092 Value *HiMem = Builder.CreateGEP(
18093 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
18094 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
18095 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
18096 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
18097
18098 Op0 = IsLE ? HiLd : LoLd;
18099 Op1 = IsLE ? LoLd : HiLd;
18100 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
18101 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
18102
18103 if (IsLE) {
18104 SmallVector<int, 16> Consts;
18105 for (int Idx = 0; Idx < 16; Idx++) {
18106 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
18107 : 16 - (NumBytes - Idx);
18108 Consts.push_back(Val);
18109 }
18110 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
18111 Zero, Consts);
18112 }
18114 for (int Idx = 0; Idx < 16; Idx++)
18115 Consts.push_back(Builder.getInt8(NumBytes + Idx));
18116 Value *Mask2 = ConstantVector::get(Consts);
18117 return Builder.CreateBitCast(
18118 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
18119 }
18120 case PPC::BI__builtin_vsx_strmb: {
18121 Value *Op0 = EmitScalarExpr(E->getArg(0));
18122 Value *Op1 = EmitScalarExpr(E->getArg(1));
18123 Value *Op2 = EmitScalarExpr(E->getArg(2));
18124 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
18125 bool IsLE = getTarget().isLittleEndian();
18126 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
18127 // Storing the whole vector, simply store it on BE and reverse bytes and
18128 // store on LE.
18129 if (Width == 16) {
18130 Value *StVec = Op2;
18131 if (IsLE) {
18132 SmallVector<int, 16> RevMask;
18133 for (int Idx = 0; Idx < 16; Idx++)
18134 RevMask.push_back(15 - Idx);
18135 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
18136 }
18137 return Builder.CreateStore(
18138 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
18139 }
18140 auto *ConvTy = Int64Ty;
18141 unsigned NumElts = 0;
18142 switch (Width) {
18143 default:
18144 llvm_unreachable("width for stores must be a power of 2");
18145 case 8:
18146 ConvTy = Int64Ty;
18147 NumElts = 2;
18148 break;
18149 case 4:
18150 ConvTy = Int32Ty;
18151 NumElts = 4;
18152 break;
18153 case 2:
18154 ConvTy = Int16Ty;
18155 NumElts = 8;
18156 break;
18157 case 1:
18158 ConvTy = Int8Ty;
18159 NumElts = 16;
18160 break;
18161 }
18162 Value *Vec = Builder.CreateBitCast(
18163 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
18164 Value *Ptr =
18165 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
18166 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
18167 if (IsLE && Width > 1) {
18168 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
18169 Elt = Builder.CreateCall(F, Elt);
18170 }
18171 return Builder.CreateStore(
18172 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
18173 };
18174 unsigned Stored = 0;
18175 unsigned RemainingBytes = NumBytes;
18176 Value *Result;
18177 if (NumBytes == 16)
18178 return StoreSubVec(16, 0, 0);
18179 if (NumBytes >= 8) {
18180 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
18181 RemainingBytes -= 8;
18182 Stored += 8;
18183 }
18184 if (RemainingBytes >= 4) {
18185 Result = StoreSubVec(4, NumBytes - Stored - 4,
18186 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
18187 RemainingBytes -= 4;
18188 Stored += 4;
18189 }
18190 if (RemainingBytes >= 2) {
18191 Result = StoreSubVec(2, NumBytes - Stored - 2,
18192 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
18193 RemainingBytes -= 2;
18194 Stored += 2;
18195 }
18196 if (RemainingBytes)
18197 Result =
18198 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
18199 return Result;
18200 }
18201 // Square root
18202 case PPC::BI__builtin_vsx_xvsqrtsp:
18203 case PPC::BI__builtin_vsx_xvsqrtdp: {
18204 llvm::Type *ResultType = ConvertType(E->getType());
18205 Value *X = EmitScalarExpr(E->getArg(0));
18206 if (Builder.getIsFPConstrained()) {
18207 llvm::Function *F = CGM.getIntrinsic(
18208 Intrinsic::experimental_constrained_sqrt, ResultType);
18209 return Builder.CreateConstrainedFPCall(F, X);
18210 } else {
18211 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18212 return Builder.CreateCall(F, X);
18213 }
18214 }
18215 // Count leading zeros
18216 case PPC::BI__builtin_altivec_vclzb:
18217 case PPC::BI__builtin_altivec_vclzh:
18218 case PPC::BI__builtin_altivec_vclzw:
18219 case PPC::BI__builtin_altivec_vclzd: {
18220 llvm::Type *ResultType = ConvertType(E->getType());
18221 Value *X = EmitScalarExpr(E->getArg(0));
18222 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18223 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
18224 return Builder.CreateCall(F, {X, Undef});
18225 }
18226 case PPC::BI__builtin_altivec_vctzb:
18227 case PPC::BI__builtin_altivec_vctzh:
18228 case PPC::BI__builtin_altivec_vctzw:
18229 case PPC::BI__builtin_altivec_vctzd: {
18230 llvm::Type *ResultType = ConvertType(E->getType());
18231 Value *X = EmitScalarExpr(E->getArg(0));
18232 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18233 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
18234 return Builder.CreateCall(F, {X, Undef});
18235 }
18236 case PPC::BI__builtin_altivec_vinsd:
18237 case PPC::BI__builtin_altivec_vinsw:
18238 case PPC::BI__builtin_altivec_vinsd_elt:
18239 case PPC::BI__builtin_altivec_vinsw_elt: {
18240 llvm::Type *ResultType = ConvertType(E->getType());
18241 Value *Op0 = EmitScalarExpr(E->getArg(0));
18242 Value *Op1 = EmitScalarExpr(E->getArg(1));
18243 Value *Op2 = EmitScalarExpr(E->getArg(2));
18244
18245 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18246 BuiltinID == PPC::BI__builtin_altivec_vinsd);
18247
18248 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18249 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
18250
18251 // The third argument must be a compile time constant.
18252 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18253 assert(ArgCI &&
18254 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
18255
18256 // Valid value for the third argument is dependent on the input type and
18257 // builtin called.
18258 int ValidMaxValue = 0;
18259 if (IsUnaligned)
18260 ValidMaxValue = (Is32bit) ? 12 : 8;
18261 else
18262 ValidMaxValue = (Is32bit) ? 3 : 1;
18263
18264 // Get value of third argument.
18265 int64_t ConstArg = ArgCI->getSExtValue();
18266
18267 // Compose range checking error message.
18268 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
18269 RangeErrMsg += " number " + llvm::to_string(ConstArg);
18270 RangeErrMsg += " is outside of the valid range [0, ";
18271 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
18272
18273 // Issue error if third argument is not within the valid range.
18274 if (ConstArg < 0 || ConstArg > ValidMaxValue)
18275 CGM.Error(E->getExprLoc(), RangeErrMsg);
18276
18277 // Input to vec_replace_elt is an element index, convert to byte index.
18278 if (!IsUnaligned) {
18279 ConstArg *= Is32bit ? 4 : 8;
18280 // Fix the constant according to endianess.
18281 if (getTarget().isLittleEndian())
18282 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
18283 }
18284
18285 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
18286 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
18287 // Casting input to vector int as per intrinsic definition.
18288 Op0 =
18289 Is32bit
18290 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
18291 : Builder.CreateBitCast(Op0,
18292 llvm::FixedVectorType::get(Int64Ty, 2));
18293 return Builder.CreateBitCast(
18294 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
18295 }
18296 case PPC::BI__builtin_altivec_vadduqm:
18297 case PPC::BI__builtin_altivec_vsubuqm: {
18298 Value *Op0 = EmitScalarExpr(E->getArg(0));
18299 Value *Op1 = EmitScalarExpr(E->getArg(1));
18300 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
18301 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
18302 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
18303 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
18304 return Builder.CreateAdd(Op0, Op1, "vadduqm");
18305 else
18306 return Builder.CreateSub(Op0, Op1, "vsubuqm");
18307 }
18308 case PPC::BI__builtin_altivec_vaddcuq_c:
18309 case PPC::BI__builtin_altivec_vsubcuq_c: {
18311 Value *Op0 = EmitScalarExpr(E->getArg(0));
18312 Value *Op1 = EmitScalarExpr(E->getArg(1));
18313 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18314 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18315 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18316 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18317 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
18318 ? Intrinsic::ppc_altivec_vaddcuq
18319 : Intrinsic::ppc_altivec_vsubcuq;
18320 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18321 }
18322 case PPC::BI__builtin_altivec_vaddeuqm_c:
18323 case PPC::BI__builtin_altivec_vaddecuq_c:
18324 case PPC::BI__builtin_altivec_vsubeuqm_c:
18325 case PPC::BI__builtin_altivec_vsubecuq_c: {
18327 Value *Op0 = EmitScalarExpr(E->getArg(0));
18328 Value *Op1 = EmitScalarExpr(E->getArg(1));
18329 Value *Op2 = EmitScalarExpr(E->getArg(2));
18330 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18331 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18332 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18333 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18334 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
18335 switch (BuiltinID) {
18336 default:
18337 llvm_unreachable("Unsupported intrinsic!");
18338 case PPC::BI__builtin_altivec_vaddeuqm_c:
18339 ID = Intrinsic::ppc_altivec_vaddeuqm;
18340 break;
18341 case PPC::BI__builtin_altivec_vaddecuq_c:
18342 ID = Intrinsic::ppc_altivec_vaddecuq;
18343 break;
18344 case PPC::BI__builtin_altivec_vsubeuqm_c:
18345 ID = Intrinsic::ppc_altivec_vsubeuqm;
18346 break;
18347 case PPC::BI__builtin_altivec_vsubecuq_c:
18348 ID = Intrinsic::ppc_altivec_vsubecuq;
18349 break;
18350 }
18351 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18352 }
18353 case PPC::BI__builtin_ppc_rldimi:
18354 case PPC::BI__builtin_ppc_rlwimi: {
18355 Value *Op0 = EmitScalarExpr(E->getArg(0));
18356 Value *Op1 = EmitScalarExpr(E->getArg(1));
18357 Value *Op2 = EmitScalarExpr(E->getArg(2));
18358 Value *Op3 = EmitScalarExpr(E->getArg(3));
18359 // rldimi is 64-bit instruction, expand the intrinsic before isel to
18360 // leverage peephole and avoid legalization efforts.
18361 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
18362 !getTarget().getTriple().isPPC64()) {
18363 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
18364 Op2 = Builder.CreateZExt(Op2, Int64Ty);
18365 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
18366 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
18367 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
18368 }
18369 return Builder.CreateCall(
18370 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
18371 ? Intrinsic::ppc_rldimi
18372 : Intrinsic::ppc_rlwimi),
18373 {Op0, Op1, Op2, Op3});
18374 }
18375 case PPC::BI__builtin_ppc_rlwnm: {
18376 Value *Op0 = EmitScalarExpr(E->getArg(0));
18377 Value *Op1 = EmitScalarExpr(E->getArg(1));
18378 Value *Op2 = EmitScalarExpr(E->getArg(2));
18379 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
18380 {Op0, Op1, Op2});
18381 }
18382 case PPC::BI__builtin_ppc_poppar4:
18383 case PPC::BI__builtin_ppc_poppar8: {
18384 Value *Op0 = EmitScalarExpr(E->getArg(0));
18385 llvm::Type *ArgType = Op0->getType();
18386 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
18387 Value *Tmp = Builder.CreateCall(F, Op0);
18388
18389 llvm::Type *ResultType = ConvertType(E->getType());
18390 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
18391 if (Result->getType() != ResultType)
18392 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
18393 "cast");
18394 return Result;
18395 }
18396 case PPC::BI__builtin_ppc_cmpb: {
18397 Value *Op0 = EmitScalarExpr(E->getArg(0));
18398 Value *Op1 = EmitScalarExpr(E->getArg(1));
18399 if (getTarget().getTriple().isPPC64()) {
18400 Function *F =
18401 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
18402 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
18403 }
18404 // For 32 bit, emit the code as below:
18405 // %conv = trunc i64 %a to i32
18406 // %conv1 = trunc i64 %b to i32
18407 // %shr = lshr i64 %a, 32
18408 // %conv2 = trunc i64 %shr to i32
18409 // %shr3 = lshr i64 %b, 32
18410 // %conv4 = trunc i64 %shr3 to i32
18411 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
18412 // %conv5 = zext i32 %0 to i64
18413 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
18414 // %conv614 = zext i32 %1 to i64
18415 // %shl = shl nuw i64 %conv614, 32
18416 // %or = or i64 %shl, %conv5
18417 // ret i64 %or
18418 Function *F =
18419 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
18420 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
18421 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
18422 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
18423 Value *ArgOneHi =
18424 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
18425 Value *ArgTwoHi =
18426 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
18427 Value *ResLo = Builder.CreateZExt(
18428 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
18429 Value *ResHiShift = Builder.CreateZExt(
18430 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
18431 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
18432 return Builder.CreateOr(ResLo, ResHi);
18433 }
18434 // Copy sign
18435 case PPC::BI__builtin_vsx_xvcpsgnsp:
18436 case PPC::BI__builtin_vsx_xvcpsgndp: {
18437 llvm::Type *ResultType = ConvertType(E->getType());
18438 Value *X = EmitScalarExpr(E->getArg(0));
18439 Value *Y = EmitScalarExpr(E->getArg(1));
18440 ID = Intrinsic::copysign;
18441 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18442 return Builder.CreateCall(F, {X, Y});
18443 }
18444 // Rounding/truncation
18445 case PPC::BI__builtin_vsx_xvrspip:
18446 case PPC::BI__builtin_vsx_xvrdpip:
18447 case PPC::BI__builtin_vsx_xvrdpim:
18448 case PPC::BI__builtin_vsx_xvrspim:
18449 case PPC::BI__builtin_vsx_xvrdpi:
18450 case PPC::BI__builtin_vsx_xvrspi:
18451 case PPC::BI__builtin_vsx_xvrdpic:
18452 case PPC::BI__builtin_vsx_xvrspic:
18453 case PPC::BI__builtin_vsx_xvrdpiz:
18454 case PPC::BI__builtin_vsx_xvrspiz: {
18455 llvm::Type *ResultType = ConvertType(E->getType());
18456 Value *X = EmitScalarExpr(E->getArg(0));
18457 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
18458 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
18459 ID = Builder.getIsFPConstrained()
18460 ? Intrinsic::experimental_constrained_floor
18461 : Intrinsic::floor;
18462 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
18463 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
18464 ID = Builder.getIsFPConstrained()
18465 ? Intrinsic::experimental_constrained_round
18466 : Intrinsic::round;
18467 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
18468 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
18469 ID = Builder.getIsFPConstrained()
18470 ? Intrinsic::experimental_constrained_rint
18471 : Intrinsic::rint;
18472 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
18473 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
18474 ID = Builder.getIsFPConstrained()
18475 ? Intrinsic::experimental_constrained_ceil
18476 : Intrinsic::ceil;
18477 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
18478 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
18479 ID = Builder.getIsFPConstrained()
18480 ? Intrinsic::experimental_constrained_trunc
18481 : Intrinsic::trunc;
18482 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18483 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
18484 : Builder.CreateCall(F, X);
18485 }
18486
18487 // Absolute value
18488 case PPC::BI__builtin_vsx_xvabsdp:
18489 case PPC::BI__builtin_vsx_xvabssp: {
18490 llvm::Type *ResultType = ConvertType(E->getType());
18491 Value *X = EmitScalarExpr(E->getArg(0));
18492 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18493 return Builder.CreateCall(F, X);
18494 }
18495
18496 // Fastmath by default
18497 case PPC::BI__builtin_ppc_recipdivf:
18498 case PPC::BI__builtin_ppc_recipdivd:
18499 case PPC::BI__builtin_ppc_rsqrtf:
18500 case PPC::BI__builtin_ppc_rsqrtd: {
18501 FastMathFlags FMF = Builder.getFastMathFlags();
18502 Builder.getFastMathFlags().setFast();
18503 llvm::Type *ResultType = ConvertType(E->getType());
18504 Value *X = EmitScalarExpr(E->getArg(0));
18505
18506 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
18507 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
18508 Value *Y = EmitScalarExpr(E->getArg(1));
18509 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
18510 Builder.getFastMathFlags() &= (FMF);
18511 return FDiv;
18512 }
18513 auto *One = ConstantFP::get(ResultType, 1.0);
18514 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18515 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
18516 Builder.getFastMathFlags() &= (FMF);
18517 return FDiv;
18518 }
18519 case PPC::BI__builtin_ppc_alignx: {
18520 Value *Op0 = EmitScalarExpr(E->getArg(0));
18521 Value *Op1 = EmitScalarExpr(E->getArg(1));
18522 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
18523 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
18524 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
18525 llvm::Value::MaximumAlignment);
18526
18527 emitAlignmentAssumption(Op1, E->getArg(1),
18528 /*The expr loc is sufficient.*/ SourceLocation(),
18529 AlignmentCI, nullptr);
18530 return Op1;
18531 }
18532 case PPC::BI__builtin_ppc_rdlam: {
18533 Value *Op0 = EmitScalarExpr(E->getArg(0));
18534 Value *Op1 = EmitScalarExpr(E->getArg(1));
18535 Value *Op2 = EmitScalarExpr(E->getArg(2));
18536 llvm::Type *Ty = Op0->getType();
18537 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
18538 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
18539 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
18540 return Builder.CreateAnd(Rotate, Op2);
18541 }
18542 case PPC::BI__builtin_ppc_load2r: {
18543 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
18544 Value *Op0 = EmitScalarExpr(E->getArg(0));
18545 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
18546 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
18547 }
18548 // FMA variations
18549 case PPC::BI__builtin_ppc_fnmsub:
18550 case PPC::BI__builtin_ppc_fnmsubs:
18551 case PPC::BI__builtin_vsx_xvmaddadp:
18552 case PPC::BI__builtin_vsx_xvmaddasp:
18553 case PPC::BI__builtin_vsx_xvnmaddadp:
18554 case PPC::BI__builtin_vsx_xvnmaddasp:
18555 case PPC::BI__builtin_vsx_xvmsubadp:
18556 case PPC::BI__builtin_vsx_xvmsubasp:
18557 case PPC::BI__builtin_vsx_xvnmsubadp:
18558 case PPC::BI__builtin_vsx_xvnmsubasp: {
18559 llvm::Type *ResultType = ConvertType(E->getType());
18560 Value *X = EmitScalarExpr(E->getArg(0));
18561 Value *Y = EmitScalarExpr(E->getArg(1));
18562 Value *Z = EmitScalarExpr(E->getArg(2));
18563 llvm::Function *F;
18564 if (Builder.getIsFPConstrained())
18565 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18566 else
18567 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18568 switch (BuiltinID) {
18569 case PPC::BI__builtin_vsx_xvmaddadp:
18570 case PPC::BI__builtin_vsx_xvmaddasp:
18571 if (Builder.getIsFPConstrained())
18572 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
18573 else
18574 return Builder.CreateCall(F, {X, Y, Z});
18575 case PPC::BI__builtin_vsx_xvnmaddadp:
18576 case PPC::BI__builtin_vsx_xvnmaddasp:
18577 if (Builder.getIsFPConstrained())
18578 return Builder.CreateFNeg(
18579 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
18580 else
18581 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
18582 case PPC::BI__builtin_vsx_xvmsubadp:
18583 case PPC::BI__builtin_vsx_xvmsubasp:
18584 if (Builder.getIsFPConstrained())
18585 return Builder.CreateConstrainedFPCall(
18586 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18587 else
18588 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18589 case PPC::BI__builtin_ppc_fnmsub:
18590 case PPC::BI__builtin_ppc_fnmsubs:
18591 case PPC::BI__builtin_vsx_xvnmsubadp:
18592 case PPC::BI__builtin_vsx_xvnmsubasp:
18593 if (Builder.getIsFPConstrained())
18594 return Builder.CreateFNeg(
18595 Builder.CreateConstrainedFPCall(
18596 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
18597 "neg");
18598 else
18599 return Builder.CreateCall(
18600 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
18601 }
18602 llvm_unreachable("Unknown FMA operation");
18603 return nullptr; // Suppress no-return warning
18604 }
18605
18606 case PPC::BI__builtin_vsx_insertword: {
18607 Value *Op0 = EmitScalarExpr(E->getArg(0));
18608 Value *Op1 = EmitScalarExpr(E->getArg(1));
18609 Value *Op2 = EmitScalarExpr(E->getArg(2));
18610 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
18611
18612 // Third argument is a compile time constant int. It must be clamped to
18613 // to the range [0, 12].
18614 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18615 assert(ArgCI &&
18616 "Third arg to xxinsertw intrinsic must be constant integer");
18617 const int64_t MaxIndex = 12;
18618 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18619
18620 // The builtin semantics don't exactly match the xxinsertw instructions
18621 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
18622 // word from the first argument, and inserts it in the second argument. The
18623 // instruction extracts the word from its second input register and inserts
18624 // it into its first input register, so swap the first and second arguments.
18625 std::swap(Op0, Op1);
18626
18627 // Need to cast the second argument from a vector of unsigned int to a
18628 // vector of long long.
18629 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18630
18631 if (getTarget().isLittleEndian()) {
18632 // Reverse the double words in the vector we will extract from.
18633 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18634 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
18635
18636 // Reverse the index.
18637 Index = MaxIndex - Index;
18638 }
18639
18640 // Intrinsic expects the first arg to be a vector of int.
18641 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18642 Op2 = ConstantInt::getSigned(Int32Ty, Index);
18643 return Builder.CreateCall(F, {Op0, Op1, Op2});
18644 }
18645
18646 case PPC::BI__builtin_vsx_extractuword: {
18647 Value *Op0 = EmitScalarExpr(E->getArg(0));
18648 Value *Op1 = EmitScalarExpr(E->getArg(1));
18649 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
18650
18651 // Intrinsic expects the first argument to be a vector of doublewords.
18652 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18653
18654 // The second argument is a compile time constant int that needs to
18655 // be clamped to the range [0, 12].
18656 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
18657 assert(ArgCI &&
18658 "Second Arg to xxextractuw intrinsic must be a constant integer!");
18659 const int64_t MaxIndex = 12;
18660 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18661
18662 if (getTarget().isLittleEndian()) {
18663 // Reverse the index.
18664 Index = MaxIndex - Index;
18665 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18666
18667 // Emit the call, then reverse the double words of the results vector.
18668 Value *Call = Builder.CreateCall(F, {Op0, Op1});
18669
18670 Value *ShuffleCall =
18671 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
18672 return ShuffleCall;
18673 } else {
18674 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18675 return Builder.CreateCall(F, {Op0, Op1});
18676 }
18677 }
18678
18679 case PPC::BI__builtin_vsx_xxpermdi: {
18680 Value *Op0 = EmitScalarExpr(E->getArg(0));
18681 Value *Op1 = EmitScalarExpr(E->getArg(1));
18682 Value *Op2 = EmitScalarExpr(E->getArg(2));
18683 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18684 assert(ArgCI && "Third arg must be constant integer!");
18685
18686 unsigned Index = ArgCI->getZExtValue();
18687 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18688 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18689
18690 // Account for endianness by treating this as just a shuffle. So we use the
18691 // same indices for both LE and BE in order to produce expected results in
18692 // both cases.
18693 int ElemIdx0 = (Index & 2) >> 1;
18694 int ElemIdx1 = 2 + (Index & 1);
18695
18696 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
18697 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18698 QualType BIRetType = E->getType();
18699 auto RetTy = ConvertType(BIRetType);
18700 return Builder.CreateBitCast(ShuffleCall, RetTy);
18701 }
18702
18703 case PPC::BI__builtin_vsx_xxsldwi: {
18704 Value *Op0 = EmitScalarExpr(E->getArg(0));
18705 Value *Op1 = EmitScalarExpr(E->getArg(1));
18706 Value *Op2 = EmitScalarExpr(E->getArg(2));
18707 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18708 assert(ArgCI && "Third argument must be a compile time constant");
18709 unsigned Index = ArgCI->getZExtValue() & 0x3;
18710 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18711 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
18712
18713 // Create a shuffle mask
18714 int ElemIdx0;
18715 int ElemIdx1;
18716 int ElemIdx2;
18717 int ElemIdx3;
18718 if (getTarget().isLittleEndian()) {
18719 // Little endian element N comes from element 8+N-Index of the
18720 // concatenated wide vector (of course, using modulo arithmetic on
18721 // the total number of elements).
18722 ElemIdx0 = (8 - Index) % 8;
18723 ElemIdx1 = (9 - Index) % 8;
18724 ElemIdx2 = (10 - Index) % 8;
18725 ElemIdx3 = (11 - Index) % 8;
18726 } else {
18727 // Big endian ElemIdx<N> = Index + N
18728 ElemIdx0 = Index;
18729 ElemIdx1 = Index + 1;
18730 ElemIdx2 = Index + 2;
18731 ElemIdx3 = Index + 3;
18732 }
18733
18734 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
18735 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18736 QualType BIRetType = E->getType();
18737 auto RetTy = ConvertType(BIRetType);
18738 return Builder.CreateBitCast(ShuffleCall, RetTy);
18739 }
18740
18741 case PPC::BI__builtin_pack_vector_int128: {
18742 Value *Op0 = EmitScalarExpr(E->getArg(0));
18743 Value *Op1 = EmitScalarExpr(E->getArg(1));
18744 bool isLittleEndian = getTarget().isLittleEndian();
18745 Value *PoisonValue =
18746 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
18747 Value *Res = Builder.CreateInsertElement(
18748 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
18749 Res = Builder.CreateInsertElement(Res, Op1,
18750 (uint64_t)(isLittleEndian ? 0 : 1));
18751 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
18752 }
18753
18754 case PPC::BI__builtin_unpack_vector_int128: {
18755 Value *Op0 = EmitScalarExpr(E->getArg(0));
18756 Value *Op1 = EmitScalarExpr(E->getArg(1));
18757 ConstantInt *Index = cast<ConstantInt>(Op1);
18758 Value *Unpacked = Builder.CreateBitCast(
18759 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
18760
18761 if (getTarget().isLittleEndian())
18762 Index =
18763 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
18764
18765 return Builder.CreateExtractElement(Unpacked, Index);
18766 }
18767
18768 case PPC::BI__builtin_ppc_sthcx: {
18769 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
18770 Value *Op0 = EmitScalarExpr(E->getArg(0));
18771 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
18772 return Builder.CreateCall(F, {Op0, Op1});
18773 }
18774
18775 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
18776 // Some of the MMA instructions accumulate their result into an existing
18777 // accumulator whereas the others generate a new accumulator. So we need to
18778 // use custom code generation to expand a builtin call with a pointer to a
18779 // load (if the corresponding instruction accumulates its result) followed by
18780 // the call to the intrinsic and a store of the result.
18781#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18782 case PPC::BI__builtin_##Name:
18783#include "clang/Basic/BuiltinsPPC.def"
18784 {
18786 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
18787 if (E->getArg(i)->getType()->isArrayType())
18788 Ops.push_back(
18789 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
18790 else
18791 Ops.push_back(EmitScalarExpr(E->getArg(i)));
18792 // The first argument of these two builtins is a pointer used to store their
18793 // result. However, the llvm intrinsics return their result in multiple
18794 // return values. So, here we emit code extracting these values from the
18795 // intrinsic results and storing them using that pointer.
18796 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
18797 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
18798 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
18799 unsigned NumVecs = 2;
18800 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
18801 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
18802 NumVecs = 4;
18803 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
18804 }
18805 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
18806 Address Addr = EmitPointerWithAlignment(E->getArg(1));
18807 Value *Vec = Builder.CreateLoad(Addr);
18808 Value *Call = Builder.CreateCall(F, {Vec});
18809 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
18810 Value *Ptr = Ops[0];
18811 for (unsigned i=0; i<NumVecs; i++) {
18812 Value *Vec = Builder.CreateExtractValue(Call, i);
18813 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
18814 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
18815 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
18816 }
18817 return Call;
18818 }
18819 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
18820 BuiltinID == PPC::BI__builtin_mma_build_acc) {
18821 // Reverse the order of the operands for LE, so the
18822 // same builtin call can be used on both LE and BE
18823 // without the need for the programmer to swap operands.
18824 // The operands are reversed starting from the second argument,
18825 // the first operand is the pointer to the pair/accumulator
18826 // that is being built.
18827 if (getTarget().isLittleEndian())
18828 std::reverse(Ops.begin() + 1, Ops.end());
18829 }
18830 bool Accumulate;
18831 switch (BuiltinID) {
18832 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18833 case PPC::BI__builtin_##Name: \
18834 ID = Intrinsic::ppc_##Intr; \
18835 Accumulate = Acc; \
18836 break;
18837 #include "clang/Basic/BuiltinsPPC.def"
18838 }
18839 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18840 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
18841 BuiltinID == PPC::BI__builtin_mma_lxvp ||
18842 BuiltinID == PPC::BI__builtin_mma_stxvp) {
18843 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18844 BuiltinID == PPC::BI__builtin_mma_lxvp) {
18845 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
18846 } else {
18847 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
18848 }
18849 Ops.pop_back();
18850 llvm::Function *F = CGM.getIntrinsic(ID);
18851 return Builder.CreateCall(F, Ops, "");
18852 }
18853 SmallVector<Value*, 4> CallOps;
18854 if (Accumulate) {
18855 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18856 Value *Acc = Builder.CreateLoad(Addr);
18857 CallOps.push_back(Acc);
18858 }
18859 for (unsigned i=1; i<Ops.size(); i++)
18860 CallOps.push_back(Ops[i]);
18861 llvm::Function *F = CGM.getIntrinsic(ID);
18862 Value *Call = Builder.CreateCall(F, CallOps);
18863 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());
18864 }
18865
18866 case PPC::BI__builtin_ppc_compare_and_swap:
18867 case PPC::BI__builtin_ppc_compare_and_swaplp: {
18868 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18869 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
18870 Value *OldVal = Builder.CreateLoad(OldValAddr);
18871 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
18872 LValue LV = MakeAddrLValue(Addr, AtomicTy);
18873 Value *Op2 = EmitScalarExpr(E->getArg(2));
18874 auto Pair = EmitAtomicCompareExchange(
18875 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
18876 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
18877 // Unlike c11's atomic_compare_exchange, according to
18878 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
18879 // > In either case, the contents of the memory location specified by addr
18880 // > are copied into the memory location specified by old_val_addr.
18881 // But it hasn't specified storing to OldValAddr is atomic or not and
18882 // which order to use. Now following XL's codegen, treat it as a normal
18883 // store.
18884 Value *LoadedVal = Pair.first.getScalarVal();
18885 Builder.CreateStore(LoadedVal, OldValAddr);
18886 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
18887 }
18888 case PPC::BI__builtin_ppc_fetch_and_add:
18889 case PPC::BI__builtin_ppc_fetch_and_addlp: {
18890 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
18891 llvm::AtomicOrdering::Monotonic);
18892 }
18893 case PPC::BI__builtin_ppc_fetch_and_and:
18894 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18895 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
18896 llvm::AtomicOrdering::Monotonic);
18897 }
18898
18899 case PPC::BI__builtin_ppc_fetch_and_or:
18900 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18901 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
18902 llvm::AtomicOrdering::Monotonic);
18903 }
18904 case PPC::BI__builtin_ppc_fetch_and_swap:
18905 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18906 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
18907 llvm::AtomicOrdering::Monotonic);
18908 }
18909 case PPC::BI__builtin_ppc_ldarx:
18910 case PPC::BI__builtin_ppc_lwarx:
18911 case PPC::BI__builtin_ppc_lharx:
18912 case PPC::BI__builtin_ppc_lbarx:
18913 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
18914 case PPC::BI__builtin_ppc_mfspr: {
18915 Value *Op0 = EmitScalarExpr(E->getArg(0));
18916 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18917 ? Int32Ty
18918 : Int64Ty;
18919 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
18920 return Builder.CreateCall(F, {Op0});
18921 }
18922 case PPC::BI__builtin_ppc_mtspr: {
18923 Value *Op0 = EmitScalarExpr(E->getArg(0));
18924 Value *Op1 = EmitScalarExpr(E->getArg(1));
18925 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18926 ? Int32Ty
18927 : Int64Ty;
18928 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
18929 return Builder.CreateCall(F, {Op0, Op1});
18930 }
18931 case PPC::BI__builtin_ppc_popcntb: {
18932 Value *ArgValue = EmitScalarExpr(E->getArg(0));
18933 llvm::Type *ArgType = ArgValue->getType();
18934 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
18935 return Builder.CreateCall(F, {ArgValue}, "popcntb");
18936 }
18937 case PPC::BI__builtin_ppc_mtfsf: {
18938 // The builtin takes a uint32 that needs to be cast to an
18939 // f64 to be passed to the intrinsic.
18940 Value *Op0 = EmitScalarExpr(E->getArg(0));
18941 Value *Op1 = EmitScalarExpr(E->getArg(1));
18942 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18943 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18944 return Builder.CreateCall(F, {Op0, Cast}, "");
18945 }
18946
18947 case PPC::BI__builtin_ppc_swdiv_nochk:
18948 case PPC::BI__builtin_ppc_swdivs_nochk: {
18949 Value *Op0 = EmitScalarExpr(E->getArg(0));
18950 Value *Op1 = EmitScalarExpr(E->getArg(1));
18951 FastMathFlags FMF = Builder.getFastMathFlags();
18952 Builder.getFastMathFlags().setFast();
18953 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18954 Builder.getFastMathFlags() &= (FMF);
18955 return FDiv;
18956 }
18957 case PPC::BI__builtin_ppc_fric:
18959 *this, E, Intrinsic::rint,
18960 Intrinsic::experimental_constrained_rint))
18961 .getScalarVal();
18962 case PPC::BI__builtin_ppc_frim:
18963 case PPC::BI__builtin_ppc_frims:
18965 *this, E, Intrinsic::floor,
18966 Intrinsic::experimental_constrained_floor))
18967 .getScalarVal();
18968 case PPC::BI__builtin_ppc_frin:
18969 case PPC::BI__builtin_ppc_frins:
18971 *this, E, Intrinsic::round,
18972 Intrinsic::experimental_constrained_round))
18973 .getScalarVal();
18974 case PPC::BI__builtin_ppc_frip:
18975 case PPC::BI__builtin_ppc_frips:
18977 *this, E, Intrinsic::ceil,
18978 Intrinsic::experimental_constrained_ceil))
18979 .getScalarVal();
18980 case PPC::BI__builtin_ppc_friz:
18981 case PPC::BI__builtin_ppc_frizs:
18983 *this, E, Intrinsic::trunc,
18984 Intrinsic::experimental_constrained_trunc))
18985 .getScalarVal();
18986 case PPC::BI__builtin_ppc_fsqrt:
18987 case PPC::BI__builtin_ppc_fsqrts:
18989 *this, E, Intrinsic::sqrt,
18990 Intrinsic::experimental_constrained_sqrt))
18991 .getScalarVal();
18992 case PPC::BI__builtin_ppc_test_data_class: {
18993 Value *Op0 = EmitScalarExpr(E->getArg(0));
18994 Value *Op1 = EmitScalarExpr(E->getArg(1));
18995 return Builder.CreateCall(
18996 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18997 {Op0, Op1}, "test_data_class");
18998 }
18999 case PPC::BI__builtin_ppc_maxfe: {
19000 Value *Op0 = EmitScalarExpr(E->getArg(0));
19001 Value *Op1 = EmitScalarExpr(E->getArg(1));
19002 Value *Op2 = EmitScalarExpr(E->getArg(2));
19003 Value *Op3 = EmitScalarExpr(E->getArg(3));
19004 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
19005 {Op0, Op1, Op2, Op3});
19006 }
19007 case PPC::BI__builtin_ppc_maxfl: {
19008 Value *Op0 = EmitScalarExpr(E->getArg(0));
19009 Value *Op1 = EmitScalarExpr(E->getArg(1));
19010 Value *Op2 = EmitScalarExpr(E->getArg(2));
19011 Value *Op3 = EmitScalarExpr(E->getArg(3));
19012 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
19013 {Op0, Op1, Op2, Op3});
19014 }
19015 case PPC::BI__builtin_ppc_maxfs: {
19016 Value *Op0 = EmitScalarExpr(E->getArg(0));
19017 Value *Op1 = EmitScalarExpr(E->getArg(1));
19018 Value *Op2 = EmitScalarExpr(E->getArg(2));
19019 Value *Op3 = EmitScalarExpr(E->getArg(3));
19020 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
19021 {Op0, Op1, Op2, Op3});
19022 }
19023 case PPC::BI__builtin_ppc_minfe: {
19024 Value *Op0 = EmitScalarExpr(E->getArg(0));
19025 Value *Op1 = EmitScalarExpr(E->getArg(1));
19026 Value *Op2 = EmitScalarExpr(E->getArg(2));
19027 Value *Op3 = EmitScalarExpr(E->getArg(3));
19028 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
19029 {Op0, Op1, Op2, Op3});
19030 }
19031 case PPC::BI__builtin_ppc_minfl: {
19032 Value *Op0 = EmitScalarExpr(E->getArg(0));
19033 Value *Op1 = EmitScalarExpr(E->getArg(1));
19034 Value *Op2 = EmitScalarExpr(E->getArg(2));
19035 Value *Op3 = EmitScalarExpr(E->getArg(3));
19036 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
19037 {Op0, Op1, Op2, Op3});
19038 }
19039 case PPC::BI__builtin_ppc_minfs: {
19040 Value *Op0 = EmitScalarExpr(E->getArg(0));
19041 Value *Op1 = EmitScalarExpr(E->getArg(1));
19042 Value *Op2 = EmitScalarExpr(E->getArg(2));
19043 Value *Op3 = EmitScalarExpr(E->getArg(3));
19044 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
19045 {Op0, Op1, Op2, Op3});
19046 }
19047 case PPC::BI__builtin_ppc_swdiv:
19048 case PPC::BI__builtin_ppc_swdivs: {
19049 Value *Op0 = EmitScalarExpr(E->getArg(0));
19050 Value *Op1 = EmitScalarExpr(E->getArg(1));
19051 return Builder.CreateFDiv(Op0, Op1, "swdiv");
19052 }
19053 case PPC::BI__builtin_ppc_set_fpscr_rn:
19054 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
19055 {EmitScalarExpr(E->getArg(0))});
19056 case PPC::BI__builtin_ppc_mffs:
19057 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
19058 }
19059}
19060
19061namespace {
19062// If \p E is not null pointer, insert address space cast to match return
19063// type of \p E if necessary.
19064Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
19065 const CallExpr *E = nullptr) {
19066 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
19067 auto *Call = CGF.Builder.CreateCall(F);
19068 Call->addRetAttr(
19069 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
19070 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
19071 if (!E)
19072 return Call;
19073 QualType BuiltinRetType = E->getType();
19074 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
19075 if (RetTy == Call->getType())
19076 return Call;
19077 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
19078}
19079
19080Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
19081 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
19082 auto *Call = CGF.Builder.CreateCall(F);
19083 Call->addRetAttr(
19084 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
19085 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
19086 return Call;
19087}
19088
19089// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
19090/// Emit code based on Code Object ABI version.
19091/// COV_4 : Emit code to use dispatch ptr
19092/// COV_5+ : Emit code to use implicitarg ptr
19093/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
19094/// and use its value for COV_4 or COV_5+ approach. It is used for
19095/// compiling device libraries in an ABI-agnostic way.
19096///
19097/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
19098/// clang during compilation of user code.
19099Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
19100 llvm::LoadInst *LD;
19101
19102 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
19103
19104 if (Cov == CodeObjectVersionKind::COV_None) {
19105 StringRef Name = "__oclc_ABI_version";
19106 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
19107 if (!ABIVersionC)
19108 ABIVersionC = new llvm::GlobalVariable(
19109 CGF.CGM.getModule(), CGF.Int32Ty, false,
19110 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
19111 llvm::GlobalVariable::NotThreadLocal,
19113
19114 // This load will be eliminated by the IPSCCP because it is constant
19115 // weak_odr without externally_initialized. Either changing it to weak or
19116 // adding externally_initialized will keep the load.
19117 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
19118 CGF.CGM.getIntAlign());
19119
19120 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
19121 ABIVersion,
19122 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
19123
19124 // Indexing the implicit kernarg segment.
19125 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
19126 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
19127
19128 // Indexing the HSA kernel_dispatch_packet struct.
19129 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
19130 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
19131
19132 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
19133 LD = CGF.Builder.CreateLoad(
19135 } else {
19136 Value *GEP = nullptr;
19137 if (Cov >= CodeObjectVersionKind::COV_5) {
19138 // Indexing the implicit kernarg segment.
19139 GEP = CGF.Builder.CreateConstGEP1_32(
19140 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
19141 } else {
19142 // Indexing the HSA kernel_dispatch_packet struct.
19143 GEP = CGF.Builder.CreateConstGEP1_32(
19144 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
19145 }
19146 LD = CGF.Builder.CreateLoad(
19148 }
19149
19150 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
19151 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
19152 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
19153 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
19154 LD->setMetadata(llvm::LLVMContext::MD_noundef,
19155 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19156 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19157 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19158 return LD;
19159}
19160
19161// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
19162Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
19163 const unsigned XOffset = 12;
19164 auto *DP = EmitAMDGPUDispatchPtr(CGF);
19165 // Indexing the HSA kernel_dispatch_packet struct.
19166 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
19167 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
19168 auto *LD = CGF.Builder.CreateLoad(
19170
19171 llvm::MDBuilder MDB(CGF.getLLVMContext());
19172
19173 // Known non-zero.
19174 LD->setMetadata(llvm::LLVMContext::MD_range,
19175 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
19176 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19177 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19178 return LD;
19179}
19180} // namespace
19181
19182// For processing memory ordering and memory scope arguments of various
19183// amdgcn builtins.
19184// \p Order takes a C++11 comptabile memory-ordering specifier and converts
19185// it into LLVM's memory ordering specifier using atomic C ABI, and writes
19186// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
19187// specific SyncScopeID and writes it to \p SSID.
19189 llvm::AtomicOrdering &AO,
19190 llvm::SyncScope::ID &SSID) {
19191 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
19192
19193 // Map C11/C++11 memory ordering to LLVM memory ordering
19194 assert(llvm::isValidAtomicOrderingCABI(ord));
19195 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
19196 case llvm::AtomicOrderingCABI::acquire:
19197 case llvm::AtomicOrderingCABI::consume:
19198 AO = llvm::AtomicOrdering::Acquire;
19199 break;
19200 case llvm::AtomicOrderingCABI::release:
19201 AO = llvm::AtomicOrdering::Release;
19202 break;
19203 case llvm::AtomicOrderingCABI::acq_rel:
19204 AO = llvm::AtomicOrdering::AcquireRelease;
19205 break;
19206 case llvm::AtomicOrderingCABI::seq_cst:
19207 AO = llvm::AtomicOrdering::SequentiallyConsistent;
19208 break;
19209 case llvm::AtomicOrderingCABI::relaxed:
19210 AO = llvm::AtomicOrdering::Monotonic;
19211 break;
19212 }
19213
19214 // Some of the atomic builtins take the scope as a string name.
19215 StringRef scp;
19216 if (llvm::getConstantStringInfo(Scope, scp)) {
19217 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
19218 return;
19219 }
19220
19221 // Older builtins had an enum argument for the memory scope.
19222 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
19223 switch (scope) {
19224 case 0: // __MEMORY_SCOPE_SYSTEM
19225 SSID = llvm::SyncScope::System;
19226 break;
19227 case 1: // __MEMORY_SCOPE_DEVICE
19228 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
19229 break;
19230 case 2: // __MEMORY_SCOPE_WRKGRP
19231 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
19232 break;
19233 case 3: // __MEMORY_SCOPE_WVFRNT
19234 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
19235 break;
19236 case 4: // __MEMORY_SCOPE_SINGLE
19237 SSID = llvm::SyncScope::SingleThread;
19238 break;
19239 default:
19240 SSID = llvm::SyncScope::System;
19241 break;
19242 }
19243}
19244
19245llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
19246 unsigned Idx,
19247 const CallExpr *E) {
19248 llvm::Value *Arg = nullptr;
19249 if ((ICEArguments & (1 << Idx)) == 0) {
19250 Arg = EmitScalarExpr(E->getArg(Idx));
19251 } else {
19252 // If this is required to be a constant, constant fold it so that we
19253 // know that the generated intrinsic gets a ConstantInt.
19254 std::optional<llvm::APSInt> Result =
19255 E->getArg(Idx)->getIntegerConstantExpr(getContext());
19256 assert(Result && "Expected argument to be a constant");
19257 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
19258 }
19259 return Arg;
19260}
19261
19262// Return dot product intrinsic that corresponds to the QT scalar type
19263static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19264 if (QT->isFloatingType())
19265 return RT.getFDotIntrinsic();
19266 if (QT->isSignedIntegerType())
19267 return RT.getSDotIntrinsic();
19268 assert(QT->isUnsignedIntegerType());
19269 return RT.getUDotIntrinsic();
19270}
19271
19272static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19274 return RT.getFirstBitSHighIntrinsic();
19275 }
19276
19278 return RT.getFirstBitUHighIntrinsic();
19279}
19280
19281// Return wave active sum that corresponds to the QT scalar type
19282static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
19283 CGHLSLRuntime &RT, QualType QT) {
19284 switch (Arch) {
19285 case llvm::Triple::spirv:
19286 return llvm::Intrinsic::spv_wave_reduce_sum;
19287 case llvm::Triple::dxil: {
19288 if (QT->isUnsignedIntegerType())
19289 return llvm::Intrinsic::dx_wave_reduce_usum;
19290 return llvm::Intrinsic::dx_wave_reduce_sum;
19291 }
19292 default:
19293 llvm_unreachable("Intrinsic WaveActiveSum"
19294 " not supported by target architecture");
19295 }
19296}
19297
19299 const CallExpr *E,
19300 ReturnValueSlot ReturnValue) {
19301 if (!getLangOpts().HLSL)
19302 return nullptr;
19303
19304 switch (BuiltinID) {
19305 case Builtin::BI__builtin_hlsl_resource_getpointer: {
19306 Value *HandleOp = EmitScalarExpr(E->getArg(0));
19307 Value *IndexOp = EmitScalarExpr(E->getArg(1));
19308
19309 // TODO: Map to an hlsl_device address space.
19310 llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext());
19311
19312 return Builder.CreateIntrinsic(
19313 RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
19314 ArrayRef<Value *>{HandleOp, IndexOp});
19315 }
19316 case Builtin::BI__builtin_hlsl_all: {
19317 Value *Op0 = EmitScalarExpr(E->getArg(0));
19318 return Builder.CreateIntrinsic(
19319 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19320 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19321 "hlsl.all");
19322 }
19323 case Builtin::BI__builtin_hlsl_any: {
19324 Value *Op0 = EmitScalarExpr(E->getArg(0));
19325 return Builder.CreateIntrinsic(
19326 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19327 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19328 "hlsl.any");
19329 }
19330 case Builtin::BI__builtin_hlsl_asdouble:
19331 return handleAsDoubleBuiltin(*this, E);
19332 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
19333 Value *OpX = EmitScalarExpr(E->getArg(0));
19334 Value *OpMin = EmitScalarExpr(E->getArg(1));
19335 Value *OpMax = EmitScalarExpr(E->getArg(2));
19336
19337 QualType Ty = E->getArg(0)->getType();
19338 if (auto *VecTy = Ty->getAs<VectorType>())
19339 Ty = VecTy->getElementType();
19340
19341 Intrinsic::ID Intr;
19342 if (Ty->isFloatingType()) {
19343 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
19344 } else if (Ty->isUnsignedIntegerType()) {
19345 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
19346 } else {
19347 assert(Ty->isSignedIntegerType());
19348 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
19349 }
19350 return Builder.CreateIntrinsic(
19351 /*ReturnType=*/OpX->getType(), Intr,
19352 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
19353 }
19354 case Builtin::BI__builtin_hlsl_cross: {
19355 Value *Op0 = EmitScalarExpr(E->getArg(0));
19356 Value *Op1 = EmitScalarExpr(E->getArg(1));
19357 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19358 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19359 "cross operands must have a float representation");
19360 // make sure each vector has exactly 3 elements
19361 assert(
19362 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19363 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19364 "input vectors must have 3 elements each");
19365 return Builder.CreateIntrinsic(
19366 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
19367 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
19368 }
19369 case Builtin::BI__builtin_hlsl_dot: {
19370 Value *Op0 = EmitScalarExpr(E->getArg(0));
19371 Value *Op1 = EmitScalarExpr(E->getArg(1));
19372 llvm::Type *T0 = Op0->getType();
19373 llvm::Type *T1 = Op1->getType();
19374
19375 // If the arguments are scalars, just emit a multiply
19376 if (!T0->isVectorTy() && !T1->isVectorTy()) {
19377 if (T0->isFloatingPointTy())
19378 return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
19379
19380 if (T0->isIntegerTy())
19381 return Builder.CreateMul(Op0, Op1, "hlsl.dot");
19382
19383 llvm_unreachable(
19384 "Scalar dot product is only supported on ints and floats.");
19385 }
19386 // For vectors, validate types and emit the appropriate intrinsic
19387
19388 // A VectorSplat should have happened
19389 assert(T0->isVectorTy() && T1->isVectorTy() &&
19390 "Dot product of vector and scalar is not supported.");
19391
19392 auto *VecTy0 = E->getArg(0)->getType()->castAs<VectorType>();
19393 [[maybe_unused]] auto *VecTy1 =
19394 E->getArg(1)->getType()->castAs<VectorType>();
19395
19396 assert(VecTy0->getElementType() == VecTy1->getElementType() &&
19397 "Dot product of vectors need the same element types.");
19398
19399 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
19400 "Dot product requires vectors to be of the same size.");
19401
19402 return Builder.CreateIntrinsic(
19403 /*ReturnType=*/T0->getScalarType(),
19404 getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
19405 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
19406 }
19407 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
19408 Value *A = EmitScalarExpr(E->getArg(0));
19409 Value *B = EmitScalarExpr(E->getArg(1));
19410 Value *C = EmitScalarExpr(E->getArg(2));
19411
19412 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
19413 return Builder.CreateIntrinsic(
19414 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19415 "hlsl.dot4add.i8packed");
19416 }
19417 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
19418 Value *A = EmitScalarExpr(E->getArg(0));
19419 Value *B = EmitScalarExpr(E->getArg(1));
19420 Value *C = EmitScalarExpr(E->getArg(2));
19421
19422 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
19423 return Builder.CreateIntrinsic(
19424 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19425 "hlsl.dot4add.u8packed");
19426 }
19427 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
19428 Value *X = EmitScalarExpr(E->getArg(0));
19429
19430 return Builder.CreateIntrinsic(
19431 /*ReturnType=*/ConvertType(E->getType()),
19433 ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
19434 }
19435 case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
19436 Value *X = EmitScalarExpr(E->getArg(0));
19437
19438 return Builder.CreateIntrinsic(
19439 /*ReturnType=*/ConvertType(E->getType()),
19440 CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
19441 nullptr, "hlsl.firstbitlow");
19442 }
19443 case Builtin::BI__builtin_hlsl_lerp: {
19444 Value *X = EmitScalarExpr(E->getArg(0));
19445 Value *Y = EmitScalarExpr(E->getArg(1));
19446 Value *S = EmitScalarExpr(E->getArg(2));
19447 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19448 llvm_unreachable("lerp operand must have a float representation");
19449 return Builder.CreateIntrinsic(
19450 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
19451 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
19452 }
19453 case Builtin::BI__builtin_hlsl_normalize: {
19454 Value *X = EmitScalarExpr(E->getArg(0));
19455
19456 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19457 "normalize operand must have a float representation");
19458
19459 return Builder.CreateIntrinsic(
19460 /*ReturnType=*/X->getType(),
19461 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
19462 nullptr, "hlsl.normalize");
19463 }
19464 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
19465 Value *X = EmitScalarExpr(E->getArg(0));
19466
19467 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19468 "degree operand must have a float representation");
19469
19470 return Builder.CreateIntrinsic(
19471 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
19472 ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
19473 }
19474 case Builtin::BI__builtin_hlsl_elementwise_frac: {
19475 Value *Op0 = EmitScalarExpr(E->getArg(0));
19476 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19477 llvm_unreachable("frac operand must have a float representation");
19478 return Builder.CreateIntrinsic(
19479 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
19480 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
19481}
19482case Builtin::BI__builtin_hlsl_elementwise_isinf: {
19483 Value *Op0 = EmitScalarExpr(E->getArg(0));
19484 llvm::Type *Xty = Op0->getType();
19485 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
19486 if (Xty->isVectorTy()) {
19487 auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
19488 retType = llvm::VectorType::get(
19489 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19490 }
19491 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19492 llvm_unreachable("isinf operand must have a float representation");
19493 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
19494 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
19495 }
19496 case Builtin::BI__builtin_hlsl_mad: {
19497 Value *M = EmitScalarExpr(E->getArg(0));
19498 Value *A = EmitScalarExpr(E->getArg(1));
19499 Value *B = EmitScalarExpr(E->getArg(2));
19500 if (E->getArg(0)->getType()->hasFloatingRepresentation())
19501 return Builder.CreateIntrinsic(
19502 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
19503 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
19504
19505 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
19506 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19507 return Builder.CreateIntrinsic(
19508 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
19509 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
19510
19511 Value *Mul = Builder.CreateNSWMul(M, A);
19512 return Builder.CreateNSWAdd(Mul, B);
19513 }
19514 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
19515 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19516 return Builder.CreateIntrinsic(
19517 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
19518 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
19519
19520 Value *Mul = Builder.CreateNUWMul(M, A);
19521 return Builder.CreateNUWAdd(Mul, B);
19522 }
19523 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
19524 Value *Op0 = EmitScalarExpr(E->getArg(0));
19525 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19526 llvm_unreachable("rcp operand must have a float representation");
19527 llvm::Type *Ty = Op0->getType();
19528 llvm::Type *EltTy = Ty->getScalarType();
19529 Constant *One = Ty->isVectorTy()
19530 ? ConstantVector::getSplat(
19531 ElementCount::getFixed(
19532 cast<FixedVectorType>(Ty)->getNumElements()),
19533 ConstantFP::get(EltTy, 1.0))
19534 : ConstantFP::get(EltTy, 1.0);
19535 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
19536 }
19537 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
19538 Value *Op0 = EmitScalarExpr(E->getArg(0));
19539 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19540 llvm_unreachable("rsqrt operand must have a float representation");
19541 return Builder.CreateIntrinsic(
19542 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
19543 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
19544 }
19545 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
19546 Value *Op0 = EmitScalarExpr(E->getArg(0));
19547 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19548 "saturate operand must have a float representation");
19549 return Builder.CreateIntrinsic(
19550 /*ReturnType=*/Op0->getType(),
19551 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
19552 nullptr, "hlsl.saturate");
19553 }
19554 case Builtin::BI__builtin_hlsl_select: {
19555 Value *OpCond = EmitScalarExpr(E->getArg(0));
19556 RValue RValTrue = EmitAnyExpr(E->getArg(1));
19557 Value *OpTrue =
19558 RValTrue.isScalar()
19559 ? RValTrue.getScalarVal()
19560 : RValTrue.getAggregatePointer(E->getArg(1)->getType(), *this);
19561 RValue RValFalse = EmitAnyExpr(E->getArg(2));
19562 Value *OpFalse =
19563 RValFalse.isScalar()
19564 ? RValFalse.getScalarVal()
19565 : RValFalse.getAggregatePointer(E->getArg(2)->getType(), *this);
19566
19567 Value *SelectVal =
19568 Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
19569 if (!RValTrue.isScalar())
19570 Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
19571 ReturnValue.isVolatile());
19572
19573 return SelectVal;
19574 }
19575 case Builtin::BI__builtin_hlsl_step: {
19576 Value *Op0 = EmitScalarExpr(E->getArg(0));
19577 Value *Op1 = EmitScalarExpr(E->getArg(1));
19578 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19579 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19580 "step operands must have a float representation");
19581 return Builder.CreateIntrinsic(
19582 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
19583 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
19584 }
19585 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
19586 Value *Op = EmitScalarExpr(E->getArg(0));
19587 assert(Op->getType()->isIntegerTy(1) &&
19588 "Intrinsic WaveActiveAllTrue operand must be a bool");
19589
19590 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
19591 return EmitRuntimeCall(
19592 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19593 }
19594 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
19595 Value *Op = EmitScalarExpr(E->getArg(0));
19596 assert(Op->getType()->isIntegerTy(1) &&
19597 "Intrinsic WaveActiveAnyTrue operand must be a bool");
19598
19599 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
19600 return EmitRuntimeCall(
19601 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19602 }
19603 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
19604 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19605 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
19606 return EmitRuntimeCall(
19607 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
19608 ArrayRef{OpExpr});
19609 }
19610 case Builtin::BI__builtin_hlsl_wave_active_sum: {
19611 // Due to the use of variadic arguments, explicitly retreive argument
19612 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19613 llvm::FunctionType *FT = llvm::FunctionType::get(
19614 OpExpr->getType(), ArrayRef{OpExpr->getType()}, false);
19615 Intrinsic::ID IID = getWaveActiveSumIntrinsic(
19616 getTarget().getTriple().getArch(), CGM.getHLSLRuntime(),
19617 E->getArg(0)->getType());
19618
19619 // Get overloaded name
19620 std::string Name =
19621 Intrinsic::getName(IID, ArrayRef{OpExpr->getType()}, &CGM.getModule());
19622 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19623 /*Local=*/false,
19624 /*AssumeConvergent=*/true),
19625 ArrayRef{OpExpr}, "hlsl.wave.active.sum");
19626 }
19627 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
19628 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
19629 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
19630 // for the DirectX intrinsic and the demangled builtin name
19631 switch (CGM.getTarget().getTriple().getArch()) {
19632 case llvm::Triple::dxil:
19633 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
19634 &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
19635 case llvm::Triple::spirv:
19637 llvm::FunctionType::get(IntTy, {}, false),
19638 "__hlsl_wave_get_lane_index", {}, false, true));
19639 default:
19640 llvm_unreachable(
19641 "Intrinsic WaveGetLaneIndex not supported by target architecture");
19642 }
19643 }
19644 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
19645 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
19646 return EmitRuntimeCall(
19647 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19648 }
19649 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
19650 // Due to the use of variadic arguments we must explicitly retreive them and
19651 // create our function type.
19652 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19653 Value *OpIndex = EmitScalarExpr(E->getArg(1));
19654 llvm::FunctionType *FT = llvm::FunctionType::get(
19655 OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()},
19656 false);
19657
19658 // Get overloaded name
19659 std::string Name =
19660 Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
19661 ArrayRef{OpExpr->getType()}, &CGM.getModule());
19662 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19663 /*Local=*/false,
19664 /*AssumeConvergent=*/true),
19665 ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
19666 }
19667 case Builtin::BI__builtin_hlsl_elementwise_sign: {
19668 auto *Arg0 = E->getArg(0);
19669 Value *Op0 = EmitScalarExpr(Arg0);
19670 llvm::Type *Xty = Op0->getType();
19671 llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
19672 if (Xty->isVectorTy()) {
19673 auto *XVecTy = Arg0->getType()->castAs<VectorType>();
19674 retType = llvm::VectorType::get(
19675 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19676 }
19677 assert((Arg0->getType()->hasFloatingRepresentation() ||
19678 Arg0->getType()->hasIntegerRepresentation()) &&
19679 "sign operand must have a float or int representation");
19680
19682 Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
19683 return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
19684 ConstantInt::get(retType, 1), "hlsl.sign");
19685 }
19686
19687 return Builder.CreateIntrinsic(
19688 retType, CGM.getHLSLRuntime().getSignIntrinsic(),
19689 ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
19690 }
19691 case Builtin::BI__builtin_hlsl_elementwise_radians: {
19692 Value *Op0 = EmitScalarExpr(E->getArg(0));
19693 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19694 "radians operand must have a float representation");
19695 return Builder.CreateIntrinsic(
19696 /*ReturnType=*/Op0->getType(),
19697 CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
19698 nullptr, "hlsl.radians");
19699 }
19700 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
19701 Value *ResHandle = EmitScalarExpr(E->getArg(0));
19702 Value *Offset = EmitScalarExpr(E->getArg(1));
19703 Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
19704 return Builder.CreateIntrinsic(
19705 /*ReturnType=*/Offset->getType(),
19706 CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
19707 ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
19708 }
19709 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
19710
19711 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
19712 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
19713 E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
19714 "asuint operands types mismatch");
19715 return handleHlslSplitdouble(E, this);
19716 }
19717 case Builtin::BI__builtin_hlsl_elementwise_clip:
19718 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19719 "clip operands types mismatch");
19720 return handleHlslClip(E, this);
19721 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
19722 Intrinsic::ID ID =
19723 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
19724 return EmitRuntimeCall(
19725 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19726 }
19727 }
19728 return nullptr;
19729}
19730
19731void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
19732 const CallExpr *E) {
19733 constexpr const char *Tag = "amdgpu-as";
19734
19735 LLVMContext &Ctx = Inst->getContext();
19737 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
19738 llvm::Value *V = EmitScalarExpr(E->getArg(K));
19739 StringRef AS;
19740 if (llvm::getConstantStringInfo(V, AS)) {
19741 MMRAs.push_back({Tag, AS});
19742 // TODO: Delete the resulting unused constant?
19743 continue;
19744 }
19745 CGM.Error(E->getExprLoc(),
19746 "expected an address space name as a string literal");
19747 }
19748
19749 llvm::sort(MMRAs);
19750 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
19751 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
19752}
19753
19755 const CallExpr *E) {
19756 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
19757 llvm::SyncScope::ID SSID;
19758 switch (BuiltinID) {
19759 case AMDGPU::BI__builtin_amdgcn_div_scale:
19760 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
19761 // Translate from the intrinsics's struct return to the builtin's out
19762 // argument.
19763
19764 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
19765
19766 llvm::Value *X = EmitScalarExpr(E->getArg(0));
19767 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
19768 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
19769
19770 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
19771 X->getType());
19772
19773 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
19774
19775 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
19776 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
19777
19778 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
19779
19780 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
19781 Builder.CreateStore(FlagExt, FlagOutPtr);
19782 return Result;
19783 }
19784 case AMDGPU::BI__builtin_amdgcn_div_fmas:
19785 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
19786 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19787 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19788 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19789 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
19790
19791 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
19792 Src0->getType());
19793 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
19794 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
19795 }
19796
19797 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
19798 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19799 Intrinsic::amdgcn_ds_swizzle);
19800 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
19801 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
19802 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
19804 // Find out if any arguments are required to be integer constant
19805 // expressions.
19806 unsigned ICEArguments = 0;
19808 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
19809 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19810 llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
19811 unsigned Size = DataTy->getPrimitiveSizeInBits();
19812 llvm::Type *IntTy =
19813 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
19814 Function *F =
19815 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
19816 ? Intrinsic::amdgcn_mov_dpp8
19817 : Intrinsic::amdgcn_update_dpp,
19818 IntTy);
19819 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
19820 E->getNumArgs() == 2);
19821 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
19822 if (InsertOld)
19823 Args.push_back(llvm::PoisonValue::get(IntTy));
19824 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
19825 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
19826 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
19827 Size < 32) {
19828 if (!DataTy->isIntegerTy())
19829 V = Builder.CreateBitCast(
19830 V, llvm::IntegerType::get(Builder.getContext(), Size));
19831 V = Builder.CreateZExtOrBitCast(V, IntTy);
19832 }
19833 llvm::Type *ExpTy =
19834 F->getFunctionType()->getFunctionParamType(I + InsertOld);
19835 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
19836 }
19837 Value *V = Builder.CreateCall(F, Args);
19838 if (Size < 32 && !DataTy->isIntegerTy())
19839 V = Builder.CreateTrunc(
19840 V, llvm::IntegerType::get(Builder.getContext(), Size));
19841 return Builder.CreateTruncOrBitCast(V, DataTy);
19842 }
19843 case AMDGPU::BI__builtin_amdgcn_permlane16:
19844 case AMDGPU::BI__builtin_amdgcn_permlanex16:
19845 return emitBuiltinWithOneOverloadedType<6>(
19846 *this, E,
19847 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
19848 ? Intrinsic::amdgcn_permlane16
19849 : Intrinsic::amdgcn_permlanex16);
19850 case AMDGPU::BI__builtin_amdgcn_permlane64:
19851 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19852 Intrinsic::amdgcn_permlane64);
19853 case AMDGPU::BI__builtin_amdgcn_readlane:
19854 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19855 Intrinsic::amdgcn_readlane);
19856 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
19857 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19858 Intrinsic::amdgcn_readfirstlane);
19859 case AMDGPU::BI__builtin_amdgcn_div_fixup:
19860 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
19861 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
19862 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19863 Intrinsic::amdgcn_div_fixup);
19864 case AMDGPU::BI__builtin_amdgcn_trig_preop:
19865 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
19866 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
19867 case AMDGPU::BI__builtin_amdgcn_rcp:
19868 case AMDGPU::BI__builtin_amdgcn_rcpf:
19869 case AMDGPU::BI__builtin_amdgcn_rcph:
19870 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
19871 case AMDGPU::BI__builtin_amdgcn_sqrt:
19872 case AMDGPU::BI__builtin_amdgcn_sqrtf:
19873 case AMDGPU::BI__builtin_amdgcn_sqrth:
19874 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19875 Intrinsic::amdgcn_sqrt);
19876 case AMDGPU::BI__builtin_amdgcn_rsq:
19877 case AMDGPU::BI__builtin_amdgcn_rsqf:
19878 case AMDGPU::BI__builtin_amdgcn_rsqh:
19879 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
19880 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
19881 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
19882 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19883 Intrinsic::amdgcn_rsq_clamp);
19884 case AMDGPU::BI__builtin_amdgcn_sinf:
19885 case AMDGPU::BI__builtin_amdgcn_sinh:
19886 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
19887 case AMDGPU::BI__builtin_amdgcn_cosf:
19888 case AMDGPU::BI__builtin_amdgcn_cosh:
19889 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
19890 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
19891 return EmitAMDGPUDispatchPtr(*this, E);
19892 case AMDGPU::BI__builtin_amdgcn_logf:
19893 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
19894 case AMDGPU::BI__builtin_amdgcn_exp2f:
19895 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19896 Intrinsic::amdgcn_exp2);
19897 case AMDGPU::BI__builtin_amdgcn_log_clampf:
19898 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19899 Intrinsic::amdgcn_log_clamp);
19900 case AMDGPU::BI__builtin_amdgcn_ldexp:
19901 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
19902 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19903 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19904 llvm::Function *F =
19905 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
19906 return Builder.CreateCall(F, {Src0, Src1});
19907 }
19908 case AMDGPU::BI__builtin_amdgcn_ldexph: {
19909 // The raw instruction has a different behavior for out of bounds exponent
19910 // values (implicit truncation instead of saturate to short_min/short_max).
19911 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19912 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19913 llvm::Function *F =
19914 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
19915 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
19916 }
19917 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
19918 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
19919 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
19920 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19921 Intrinsic::amdgcn_frexp_mant);
19922 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
19923 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
19924 Value *Src0 = EmitScalarExpr(E->getArg(0));
19925 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19926 { Builder.getInt32Ty(), Src0->getType() });
19927 return Builder.CreateCall(F, Src0);
19928 }
19929 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
19930 Value *Src0 = EmitScalarExpr(E->getArg(0));
19931 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19932 { Builder.getInt16Ty(), Src0->getType() });
19933 return Builder.CreateCall(F, Src0);
19934 }
19935 case AMDGPU::BI__builtin_amdgcn_fract:
19936 case AMDGPU::BI__builtin_amdgcn_fractf:
19937 case AMDGPU::BI__builtin_amdgcn_fracth:
19938 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19939 Intrinsic::amdgcn_fract);
19940 case AMDGPU::BI__builtin_amdgcn_lerp:
19941 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19942 Intrinsic::amdgcn_lerp);
19943 case AMDGPU::BI__builtin_amdgcn_ubfe:
19944 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19945 Intrinsic::amdgcn_ubfe);
19946 case AMDGPU::BI__builtin_amdgcn_sbfe:
19947 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19948 Intrinsic::amdgcn_sbfe);
19949 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
19950 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
19951 llvm::Type *ResultType = ConvertType(E->getType());
19952 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19953 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
19954 return Builder.CreateCall(F, { Src });
19955 }
19956 case AMDGPU::BI__builtin_amdgcn_uicmp:
19957 case AMDGPU::BI__builtin_amdgcn_uicmpl:
19958 case AMDGPU::BI__builtin_amdgcn_sicmp:
19959 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
19960 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19961 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19962 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19963
19964 // FIXME-GFX10: How should 32 bit mask be handled?
19965 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
19966 { Builder.getInt64Ty(), Src0->getType() });
19967 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19968 }
19969 case AMDGPU::BI__builtin_amdgcn_fcmp:
19970 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
19971 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19972 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19973 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19974
19975 // FIXME-GFX10: How should 32 bit mask be handled?
19976 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
19977 { Builder.getInt64Ty(), Src0->getType() });
19978 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19979 }
19980 case AMDGPU::BI__builtin_amdgcn_class:
19981 case AMDGPU::BI__builtin_amdgcn_classf:
19982 case AMDGPU::BI__builtin_amdgcn_classh:
19983 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
19984 case AMDGPU::BI__builtin_amdgcn_fmed3f:
19985 case AMDGPU::BI__builtin_amdgcn_fmed3h:
19986 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19987 Intrinsic::amdgcn_fmed3);
19988 case AMDGPU::BI__builtin_amdgcn_ds_append:
19989 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
19990 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
19991 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
19992 Value *Src0 = EmitScalarExpr(E->getArg(0));
19993 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
19994 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
19995 }
19996 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19997 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19998 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19999 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
20000 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
20001 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
20002 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
20003 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
20004 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
20005 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
20006 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
20007 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
20008 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
20009 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
20010 Intrinsic::ID IID;
20011 switch (BuiltinID) {
20012 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
20013 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
20014 IID = Intrinsic::amdgcn_global_load_tr_b64;
20015 break;
20016 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
20017 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
20018 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
20019 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
20020 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
20021 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
20022 IID = Intrinsic::amdgcn_global_load_tr_b128;
20023 break;
20024 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
20025 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
20026 break;
20027 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
20028 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
20029 break;
20030 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
20031 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
20032 break;
20033 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
20034 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
20035 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
20036 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
20037 break;
20038 }
20039 llvm::Type *LoadTy = ConvertType(E->getType());
20040 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
20041 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
20042 return Builder.CreateCall(F, {Addr});
20043 }
20044 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
20045 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
20046 {llvm::Type::getInt64Ty(getLLVMContext())});
20047 return Builder.CreateCall(F);
20048 }
20049 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
20050 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
20051 {llvm::Type::getInt64Ty(getLLVMContext())});
20052 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
20053 return Builder.CreateCall(F, {Env});
20054 }
20055 case AMDGPU::BI__builtin_amdgcn_read_exec:
20056 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
20057 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
20058 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
20059 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
20060 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
20061 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
20062 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
20063 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
20064 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
20065 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
20066 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
20067 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
20068 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
20069 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
20070 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
20071
20072 // The builtins take these arguments as vec4 where the last element is
20073 // ignored. The intrinsic takes them as vec3.
20074 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
20075 ArrayRef<int>{0, 1, 2});
20076 RayDir =
20077 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
20078 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
20079 ArrayRef<int>{0, 1, 2});
20080
20081 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
20082 {NodePtr->getType(), RayDir->getType()});
20083 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
20084 RayInverseDir, TextureDescr});
20085 }
20086
20087 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
20089 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
20090 Args.push_back(EmitScalarExpr(E->getArg(i)));
20091
20092 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
20093 Value *Call = Builder.CreateCall(F, Args);
20094 Value *Rtn = Builder.CreateExtractValue(Call, 0);
20095 Value *A = Builder.CreateExtractValue(Call, 1);
20096 llvm::Type *RetTy = ConvertType(E->getType());
20097 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
20098 (uint64_t)0);
20099 return Builder.CreateInsertElement(I0, A, 1);
20100 }
20101 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
20102 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
20103 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
20105 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
20106 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
20107 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
20108 {VT, VT});
20109
20111 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
20112 Args.push_back(EmitScalarExpr(E->getArg(I)));
20113 return Builder.CreateCall(F, Args);
20114 }
20115 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20116 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20117 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20118 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20119 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20120 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20121 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20122 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20123 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20124 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20125 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
20126 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
20127 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20128 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20129 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20130 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20131 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20132 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20133 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20134 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20135 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20136 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20137 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
20138 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
20139 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20140 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20141 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20142 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20143 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20144 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20145 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20146 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20147 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20148 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20149 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20150 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20151 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20152 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20153 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20154 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20155 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20156 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20157 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20158 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20159 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20160 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20161 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20162 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20163 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20164 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20165 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20166 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20167 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20168 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20169 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20170 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20171 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20172 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20173 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20174 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
20175
20176 // These operations perform a matrix multiplication and accumulation of
20177 // the form:
20178 // D = A * B + C
20179 // We need to specify one type for matrices AB and one for matrices CD.
20180 // Sparse matrix operations can have different types for A and B as well as
20181 // an additional type for sparsity index.
20182 // Destination type should be put before types used for source operands.
20183 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
20184 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
20185 // There is no need for the variable opsel argument, so always set it to
20186 // "false".
20187 bool AppendFalseForOpselArg = false;
20188 unsigned BuiltinWMMAOp;
20189
20190 switch (BuiltinID) {
20191 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
20192 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
20193 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
20194 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
20195 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20196 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
20197 break;
20198 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20199 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20200 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20201 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20202 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20203 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
20204 break;
20205 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20206 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20207 AppendFalseForOpselArg = true;
20208 [[fallthrough]];
20209 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20210 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20211 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20212 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
20213 break;
20214 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20215 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20216 AppendFalseForOpselArg = true;
20217 [[fallthrough]];
20218 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20219 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20220 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20221 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
20222 break;
20223 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20224 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20225 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20226 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
20227 break;
20228 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20229 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20230 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20231 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
20232 break;
20233 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20234 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20235 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20236 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20237 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20238 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
20239 break;
20240 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20241 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20242 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20243 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20244 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20245 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
20246 break;
20247 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20248 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20249 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20250 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
20251 break;
20252 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20253 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20254 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20255 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
20256 break;
20257 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20258 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20259 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20260 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
20261 break;
20262 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20263 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20264 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20265 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
20266 break;
20267 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20268 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20269 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20270 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
20271 break;
20272 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20273 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20274 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20275 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
20276 break;
20277 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20278 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20279 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20280 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
20281 break;
20282 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20283 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20284 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20285 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
20286 break;
20287 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20288 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20289 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20290 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
20291 break;
20292 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20293 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20294 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20295 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
20296 break;
20297 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20298 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20299 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20300 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
20301 break;
20302 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20303 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20304 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20305 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
20306 break;
20307 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20308 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20309 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20310 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
20311 break;
20312 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20313 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20314 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20315 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
20316 break;
20317 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20318 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20319 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20320 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
20321 break;
20322 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20323 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
20324 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20325 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
20326 break;
20327 }
20328
20330 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
20331 Args.push_back(EmitScalarExpr(E->getArg(i)));
20332 if (AppendFalseForOpselArg)
20333 Args.push_back(Builder.getFalse());
20334
20336 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
20337 ArgTypes.push_back(Args[ArgIdx]->getType());
20338
20339 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
20340 return Builder.CreateCall(F, Args);
20341 }
20342
20343 // amdgcn workitem
20344 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
20345 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
20346 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
20347 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
20348 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
20349 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
20350
20351 // amdgcn workgroup size
20352 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
20353 return EmitAMDGPUWorkGroupSize(*this, 0);
20354 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
20355 return EmitAMDGPUWorkGroupSize(*this, 1);
20356 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
20357 return EmitAMDGPUWorkGroupSize(*this, 2);
20358
20359 // amdgcn grid size
20360 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
20361 return EmitAMDGPUGridSize(*this, 0);
20362 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
20363 return EmitAMDGPUGridSize(*this, 1);
20364 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
20365 return EmitAMDGPUGridSize(*this, 2);
20366
20367 // r600 intrinsics
20368 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
20369 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
20370 return emitBuiltinWithOneOverloadedType<1>(*this, E,
20371 Intrinsic::r600_recipsqrt_ieee);
20372 case AMDGPU::BI__builtin_r600_read_tidig_x:
20373 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
20374 case AMDGPU::BI__builtin_r600_read_tidig_y:
20375 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
20376 case AMDGPU::BI__builtin_r600_read_tidig_z:
20377 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
20378 case AMDGPU::BI__builtin_amdgcn_alignbit: {
20379 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
20380 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
20381 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
20382 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
20383 return Builder.CreateCall(F, { Src0, Src1, Src2 });
20384 }
20385 case AMDGPU::BI__builtin_amdgcn_fence: {
20387 EmitScalarExpr(E->getArg(1)), AO, SSID);
20388 FenceInst *Fence = Builder.CreateFence(AO, SSID);
20389 if (E->getNumArgs() > 2)
20391 return Fence;
20392 }
20393 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20394 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20395 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20396 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20397 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20398 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20399 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20400 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20401 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20402 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20403 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20404 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20405 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20406 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20407 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20408 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20409 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20410 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20411 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20412 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20413 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20414 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20415 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
20416 llvm::AtomicRMWInst::BinOp BinOp;
20417 switch (BuiltinID) {
20418 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20419 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20420 BinOp = llvm::AtomicRMWInst::UIncWrap;
20421 break;
20422 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20423 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20424 BinOp = llvm::AtomicRMWInst::UDecWrap;
20425 break;
20426 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20427 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20428 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20429 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20430 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20431 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20432 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20433 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20434 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20435 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20436 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20437 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20438 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20439 BinOp = llvm::AtomicRMWInst::FAdd;
20440 break;
20441 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20442 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20443 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20444 BinOp = llvm::AtomicRMWInst::FMin;
20445 break;
20446 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20447 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
20448 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20449 BinOp = llvm::AtomicRMWInst::FMax;
20450 break;
20451 }
20452
20453 Address Ptr = CheckAtomicAlignment(*this, E);
20454 Value *Val = EmitScalarExpr(E->getArg(1));
20455 llvm::Type *OrigTy = Val->getType();
20456 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
20457
20458 bool Volatile;
20459
20460 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
20461 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
20462 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
20463 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
20464 Volatile =
20465 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
20466 } else {
20467 // Infer volatile from the passed type.
20468 Volatile =
20470 }
20471
20472 if (E->getNumArgs() >= 4) {
20473 // Some of the builtins have explicit ordering and scope arguments.
20475 EmitScalarExpr(E->getArg(3)), AO, SSID);
20476 } else {
20477 // Most of the builtins do not have syncscope/order arguments. For DS
20478 // atomics the scope doesn't really matter, as they implicitly operate at
20479 // workgroup scope.
20480 //
20481 // The global/flat cases need to use agent scope to consistently produce
20482 // the native instruction instead of a cmpxchg expansion.
20483 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
20484 AO = AtomicOrdering::Monotonic;
20485
20486 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
20487 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
20488 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
20489 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
20490 llvm::Type *V2BF16Ty = FixedVectorType::get(
20491 llvm::Type::getBFloatTy(Builder.getContext()), 2);
20492 Val = Builder.CreateBitCast(Val, V2BF16Ty);
20493 }
20494 }
20495
20496 llvm::AtomicRMWInst *RMW =
20497 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
20498 if (Volatile)
20499 RMW->setVolatile(true);
20500
20501 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
20502 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
20503 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
20504 // instruction for flat and global operations.
20505 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
20506 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
20507
20508 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
20509 // instruction, but this only matters for float fadd.
20510 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
20511 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
20512 }
20513
20514 return Builder.CreateBitCast(RMW, OrigTy);
20515 }
20516 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
20517 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
20518 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
20519 llvm::Type *ResultType = ConvertType(E->getType());
20520 // s_sendmsg_rtn is mangled using return type only.
20521 Function *F =
20522 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
20523 return Builder.CreateCall(F, {Arg});
20524 }
20525 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
20526 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
20527 // Because builtin types are limited, and the intrinsic uses a struct/pair
20528 // output, marshal the pair-of-i32 to <2 x i32>.
20529 Value *VDstOld = EmitScalarExpr(E->getArg(0));
20530 Value *VSrcOld = EmitScalarExpr(E->getArg(1));
20531 Value *FI = EmitScalarExpr(E->getArg(2));
20532 Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
20533 Function *F =
20534 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
20535 ? Intrinsic::amdgcn_permlane16_swap
20536 : Intrinsic::amdgcn_permlane32_swap);
20537 llvm::CallInst *Call =
20538 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
20539
20540 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
20541 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
20542
20543 llvm::Type *ResultType = ConvertType(E->getType());
20544
20545 llvm::Value *Insert0 = Builder.CreateInsertElement(
20546 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
20547 llvm::Value *AsVector =
20548 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
20549 return AsVector;
20550 }
20551 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
20552 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
20553 return emitBuiltinWithOneOverloadedType<4>(*this, E,
20554 Intrinsic::amdgcn_bitop3);
20555 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
20556 return emitBuiltinWithOneOverloadedType<4>(
20557 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
20558 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
20559 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
20560 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
20561 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
20562 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
20563 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
20564 return emitBuiltinWithOneOverloadedType<5>(
20565 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
20566 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20567 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20568 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20569 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20570 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20571 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
20572 llvm::Type *RetTy = nullptr;
20573 switch (BuiltinID) {
20574 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20575 RetTy = Int8Ty;
20576 break;
20577 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20578 RetTy = Int16Ty;
20579 break;
20580 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20581 RetTy = Int32Ty;
20582 break;
20583 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20584 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
20585 break;
20586 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20587 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
20588 break;
20589 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
20590 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
20591 break;
20592 }
20593 Function *F =
20594 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
20595 return Builder.CreateCall(
20596 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
20597 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
20598 }
20599 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
20600 return emitBuiltinWithOneOverloadedType<2>(
20601 *this, E, Intrinsic::amdgcn_s_prefetch_data);
20602 default:
20603 return nullptr;
20604 }
20605}
20606
20608 const CallExpr *E) {
20609 switch (BuiltinID) {
20610 case SPIRV::BI__builtin_spirv_distance: {
20611 Value *X = EmitScalarExpr(E->getArg(0));
20612 Value *Y = EmitScalarExpr(E->getArg(1));
20613 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
20614 E->getArg(1)->getType()->hasFloatingRepresentation() &&
20615 "Distance operands must have a float representation");
20616 assert(E->getArg(0)->getType()->isVectorType() &&
20617 E->getArg(1)->getType()->isVectorType() &&
20618 "Distance operands must be a vector");
20619 return Builder.CreateIntrinsic(
20620 /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_distance,
20621 ArrayRef<Value *>{X, Y}, nullptr, "spv.distance");
20622 }
20623 case SPIRV::BI__builtin_spirv_length: {
20624 Value *X = EmitScalarExpr(E->getArg(0));
20625 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
20626 "length operand must have a float representation");
20627 assert(E->getArg(0)->getType()->isVectorType() &&
20628 "length operand must be a vector");
20629 return Builder.CreateIntrinsic(
20630 /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_length,
20631 ArrayRef<Value *>{X}, nullptr, "spv.length");
20632 }
20633 }
20634 return nullptr;
20635}
20636
20637/// Handle a SystemZ function in which the final argument is a pointer
20638/// to an int that receives the post-instruction CC value. At the LLVM level
20639/// this is represented as a function that returns a {result, cc} pair.
20641 unsigned IntrinsicID,
20642 const CallExpr *E) {
20643 unsigned NumArgs = E->getNumArgs() - 1;
20644 SmallVector<Value *, 8> Args(NumArgs);
20645 for (unsigned I = 0; I < NumArgs; ++I)
20646 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
20647 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
20648 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
20649 Value *Call = CGF.Builder.CreateCall(F, Args);
20650 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
20651 CGF.Builder.CreateStore(CC, CCPtr);
20652 return CGF.Builder.CreateExtractValue(Call, 0);
20653}
20654
20656 const CallExpr *E) {
20657 switch (BuiltinID) {
20658 case SystemZ::BI__builtin_tbegin: {
20659 Value *TDB = EmitScalarExpr(E->getArg(0));
20660 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20661 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
20662 return Builder.CreateCall(F, {TDB, Control});
20663 }
20664 case SystemZ::BI__builtin_tbegin_nofloat: {
20665 Value *TDB = EmitScalarExpr(E->getArg(0));
20666 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20667 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
20668 return Builder.CreateCall(F, {TDB, Control});
20669 }
20670 case SystemZ::BI__builtin_tbeginc: {
20671 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
20672 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
20673 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
20674 return Builder.CreateCall(F, {TDB, Control});
20675 }
20676 case SystemZ::BI__builtin_tabort: {
20677 Value *Data = EmitScalarExpr(E->getArg(0));
20678 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
20679 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
20680 }
20681 case SystemZ::BI__builtin_non_tx_store: {
20682 Value *Address = EmitScalarExpr(E->getArg(0));
20683 Value *Data = EmitScalarExpr(E->getArg(1));
20684 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
20685 return Builder.CreateCall(F, {Data, Address});
20686 }
20687
20688 // Vector builtins. Note that most vector builtins are mapped automatically
20689 // to target-specific LLVM intrinsics. The ones handled specially here can
20690 // be represented via standard LLVM IR, which is preferable to enable common
20691 // LLVM optimizations.
20692
20693 case SystemZ::BI__builtin_s390_vclzb:
20694 case SystemZ::BI__builtin_s390_vclzh:
20695 case SystemZ::BI__builtin_s390_vclzf:
20696 case SystemZ::BI__builtin_s390_vclzg:
20697 case SystemZ::BI__builtin_s390_vclzq: {
20698 llvm::Type *ResultType = ConvertType(E->getType());
20699 Value *X = EmitScalarExpr(E->getArg(0));
20700 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20701 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
20702 return Builder.CreateCall(F, {X, Undef});
20703 }
20704
20705 case SystemZ::BI__builtin_s390_vctzb:
20706 case SystemZ::BI__builtin_s390_vctzh:
20707 case SystemZ::BI__builtin_s390_vctzf:
20708 case SystemZ::BI__builtin_s390_vctzg:
20709 case SystemZ::BI__builtin_s390_vctzq: {
20710 llvm::Type *ResultType = ConvertType(E->getType());
20711 Value *X = EmitScalarExpr(E->getArg(0));
20712 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20713 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
20714 return Builder.CreateCall(F, {X, Undef});
20715 }
20716
20717 case SystemZ::BI__builtin_s390_verllb:
20718 case SystemZ::BI__builtin_s390_verllh:
20719 case SystemZ::BI__builtin_s390_verllf:
20720 case SystemZ::BI__builtin_s390_verllg: {
20721 llvm::Type *ResultType = ConvertType(E->getType());
20722 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20723 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20724 // Splat scalar rotate amount to vector type.
20725 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
20726 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
20727 Amt = Builder.CreateVectorSplat(NumElts, Amt);
20728 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20729 return Builder.CreateCall(F, { Src, Src, Amt });
20730 }
20731
20732 case SystemZ::BI__builtin_s390_verllvb:
20733 case SystemZ::BI__builtin_s390_verllvh:
20734 case SystemZ::BI__builtin_s390_verllvf:
20735 case SystemZ::BI__builtin_s390_verllvg: {
20736 llvm::Type *ResultType = ConvertType(E->getType());
20737 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20738 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20739 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20740 return Builder.CreateCall(F, { Src, Src, Amt });
20741 }
20742
20743 case SystemZ::BI__builtin_s390_vfsqsb:
20744 case SystemZ::BI__builtin_s390_vfsqdb: {
20745 llvm::Type *ResultType = ConvertType(E->getType());
20746 Value *X = EmitScalarExpr(E->getArg(0));
20747 if (Builder.getIsFPConstrained()) {
20748 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
20749 return Builder.CreateConstrainedFPCall(F, { X });
20750 } else {
20751 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
20752 return Builder.CreateCall(F, X);
20753 }
20754 }
20755 case SystemZ::BI__builtin_s390_vfmasb:
20756 case SystemZ::BI__builtin_s390_vfmadb: {
20757 llvm::Type *ResultType = ConvertType(E->getType());
20758 Value *X = EmitScalarExpr(E->getArg(0));
20759 Value *Y = EmitScalarExpr(E->getArg(1));
20760 Value *Z = EmitScalarExpr(E->getArg(2));
20761 if (Builder.getIsFPConstrained()) {
20762 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20763 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
20764 } else {
20765 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20766 return Builder.CreateCall(F, {X, Y, Z});
20767 }
20768 }
20769 case SystemZ::BI__builtin_s390_vfmssb:
20770 case SystemZ::BI__builtin_s390_vfmsdb: {
20771 llvm::Type *ResultType = ConvertType(E->getType());
20772 Value *X = EmitScalarExpr(E->getArg(0));
20773 Value *Y = EmitScalarExpr(E->getArg(1));
20774 Value *Z = EmitScalarExpr(E->getArg(2));
20775 if (Builder.getIsFPConstrained()) {
20776 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20777 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20778 } else {
20779 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20780 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20781 }
20782 }
20783 case SystemZ::BI__builtin_s390_vfnmasb:
20784 case SystemZ::BI__builtin_s390_vfnmadb: {
20785 llvm::Type *ResultType = ConvertType(E->getType());
20786 Value *X = EmitScalarExpr(E->getArg(0));
20787 Value *Y = EmitScalarExpr(E->getArg(1));
20788 Value *Z = EmitScalarExpr(E->getArg(2));
20789 if (Builder.getIsFPConstrained()) {
20790 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20791 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
20792 } else {
20793 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20794 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
20795 }
20796 }
20797 case SystemZ::BI__builtin_s390_vfnmssb:
20798 case SystemZ::BI__builtin_s390_vfnmsdb: {
20799 llvm::Type *ResultType = ConvertType(E->getType());
20800 Value *X = EmitScalarExpr(E->getArg(0));
20801 Value *Y = EmitScalarExpr(E->getArg(1));
20802 Value *Z = EmitScalarExpr(E->getArg(2));
20803 if (Builder.getIsFPConstrained()) {
20804 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20805 Value *NegZ = Builder.CreateFNeg(Z, "sub");
20806 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
20807 } else {
20808 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20809 Value *NegZ = Builder.CreateFNeg(Z, "neg");
20810 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
20811 }
20812 }
20813 case SystemZ::BI__builtin_s390_vflpsb:
20814 case SystemZ::BI__builtin_s390_vflpdb: {
20815 llvm::Type *ResultType = ConvertType(E->getType());
20816 Value *X = EmitScalarExpr(E->getArg(0));
20817 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20818 return Builder.CreateCall(F, X);
20819 }
20820 case SystemZ::BI__builtin_s390_vflnsb:
20821 case SystemZ::BI__builtin_s390_vflndb: {
20822 llvm::Type *ResultType = ConvertType(E->getType());
20823 Value *X = EmitScalarExpr(E->getArg(0));
20824 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20825 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
20826 }
20827 case SystemZ::BI__builtin_s390_vfisb:
20828 case SystemZ::BI__builtin_s390_vfidb: {
20829 llvm::Type *ResultType = ConvertType(E->getType());
20830 Value *X = EmitScalarExpr(E->getArg(0));
20831 // Constant-fold the M4 and M5 mask arguments.
20832 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
20833 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20834 // Check whether this instance can be represented via a LLVM standard
20835 // intrinsic. We only support some combinations of M4 and M5.
20836 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20837 Intrinsic::ID CI;
20838 switch (M4.getZExtValue()) {
20839 default: break;
20840 case 0: // IEEE-inexact exception allowed
20841 switch (M5.getZExtValue()) {
20842 default: break;
20843 case 0: ID = Intrinsic::rint;
20844 CI = Intrinsic::experimental_constrained_rint; break;
20845 }
20846 break;
20847 case 4: // IEEE-inexact exception suppressed
20848 switch (M5.getZExtValue()) {
20849 default: break;
20850 case 0: ID = Intrinsic::nearbyint;
20851 CI = Intrinsic::experimental_constrained_nearbyint; break;
20852 case 1: ID = Intrinsic::round;
20853 CI = Intrinsic::experimental_constrained_round; break;
20854 case 5: ID = Intrinsic::trunc;
20855 CI = Intrinsic::experimental_constrained_trunc; break;
20856 case 6: ID = Intrinsic::ceil;
20857 CI = Intrinsic::experimental_constrained_ceil; break;
20858 case 7: ID = Intrinsic::floor;
20859 CI = Intrinsic::experimental_constrained_floor; break;
20860 }
20861 break;
20862 }
20863 if (ID != Intrinsic::not_intrinsic) {
20864 if (Builder.getIsFPConstrained()) {
20865 Function *F = CGM.getIntrinsic(CI, ResultType);
20866 return Builder.CreateConstrainedFPCall(F, X);
20867 } else {
20868 Function *F = CGM.getIntrinsic(ID, ResultType);
20869 return Builder.CreateCall(F, X);
20870 }
20871 }
20872 switch (BuiltinID) { // FIXME: constrained version?
20873 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
20874 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
20875 default: llvm_unreachable("Unknown BuiltinID");
20876 }
20877 Function *F = CGM.getIntrinsic(ID);
20878 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20879 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
20880 return Builder.CreateCall(F, {X, M4Value, M5Value});
20881 }
20882 case SystemZ::BI__builtin_s390_vfmaxsb:
20883 case SystemZ::BI__builtin_s390_vfmaxdb: {
20884 llvm::Type *ResultType = ConvertType(E->getType());
20885 Value *X = EmitScalarExpr(E->getArg(0));
20886 Value *Y = EmitScalarExpr(E->getArg(1));
20887 // Constant-fold the M4 mask argument.
20888 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20889 // Check whether this instance can be represented via a LLVM standard
20890 // intrinsic. We only support some values of M4.
20891 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20892 Intrinsic::ID CI;
20893 switch (M4.getZExtValue()) {
20894 default: break;
20895 case 4: ID = Intrinsic::maxnum;
20896 CI = Intrinsic::experimental_constrained_maxnum; break;
20897 }
20898 if (ID != Intrinsic::not_intrinsic) {
20899 if (Builder.getIsFPConstrained()) {
20900 Function *F = CGM.getIntrinsic(CI, ResultType);
20901 return Builder.CreateConstrainedFPCall(F, {X, Y});
20902 } else {
20903 Function *F = CGM.getIntrinsic(ID, ResultType);
20904 return Builder.CreateCall(F, {X, Y});
20905 }
20906 }
20907 switch (BuiltinID) {
20908 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
20909 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
20910 default: llvm_unreachable("Unknown BuiltinID");
20911 }
20912 Function *F = CGM.getIntrinsic(ID);
20913 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20914 return Builder.CreateCall(F, {X, Y, M4Value});
20915 }
20916 case SystemZ::BI__builtin_s390_vfminsb:
20917 case SystemZ::BI__builtin_s390_vfmindb: {
20918 llvm::Type *ResultType = ConvertType(E->getType());
20919 Value *X = EmitScalarExpr(E->getArg(0));
20920 Value *Y = EmitScalarExpr(E->getArg(1));
20921 // Constant-fold the M4 mask argument.
20922 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20923 // Check whether this instance can be represented via a LLVM standard
20924 // intrinsic. We only support some values of M4.
20925 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20926 Intrinsic::ID CI;
20927 switch (M4.getZExtValue()) {
20928 default: break;
20929 case 4: ID = Intrinsic::minnum;
20930 CI = Intrinsic::experimental_constrained_minnum; break;
20931 }
20932 if (ID != Intrinsic::not_intrinsic) {
20933 if (Builder.getIsFPConstrained()) {
20934 Function *F = CGM.getIntrinsic(CI, ResultType);
20935 return Builder.CreateConstrainedFPCall(F, {X, Y});
20936 } else {
20937 Function *F = CGM.getIntrinsic(ID, ResultType);
20938 return Builder.CreateCall(F, {X, Y});
20939 }
20940 }
20941 switch (BuiltinID) {
20942 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
20943 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
20944 default: llvm_unreachable("Unknown BuiltinID");
20945 }
20946 Function *F = CGM.getIntrinsic(ID);
20947 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20948 return Builder.CreateCall(F, {X, Y, M4Value});
20949 }
20950
20951 case SystemZ::BI__builtin_s390_vlbrh:
20952 case SystemZ::BI__builtin_s390_vlbrf:
20953 case SystemZ::BI__builtin_s390_vlbrg:
20954 case SystemZ::BI__builtin_s390_vlbrq: {
20955 llvm::Type *ResultType = ConvertType(E->getType());
20956 Value *X = EmitScalarExpr(E->getArg(0));
20957 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
20958 return Builder.CreateCall(F, X);
20959 }
20960
20961 // Vector intrinsics that output the post-instruction CC value.
20962
20963#define INTRINSIC_WITH_CC(NAME) \
20964 case SystemZ::BI__builtin_##NAME: \
20965 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
20966
20967 INTRINSIC_WITH_CC(s390_vpkshs);
20968 INTRINSIC_WITH_CC(s390_vpksfs);
20969 INTRINSIC_WITH_CC(s390_vpksgs);
20970
20971 INTRINSIC_WITH_CC(s390_vpklshs);
20972 INTRINSIC_WITH_CC(s390_vpklsfs);
20973 INTRINSIC_WITH_CC(s390_vpklsgs);
20974
20975 INTRINSIC_WITH_CC(s390_vceqbs);
20976 INTRINSIC_WITH_CC(s390_vceqhs);
20977 INTRINSIC_WITH_CC(s390_vceqfs);
20978 INTRINSIC_WITH_CC(s390_vceqgs);
20979 INTRINSIC_WITH_CC(s390_vceqqs);
20980
20981 INTRINSIC_WITH_CC(s390_vchbs);
20982 INTRINSIC_WITH_CC(s390_vchhs);
20983 INTRINSIC_WITH_CC(s390_vchfs);
20984 INTRINSIC_WITH_CC(s390_vchgs);
20985 INTRINSIC_WITH_CC(s390_vchqs);
20986
20987 INTRINSIC_WITH_CC(s390_vchlbs);
20988 INTRINSIC_WITH_CC(s390_vchlhs);
20989 INTRINSIC_WITH_CC(s390_vchlfs);
20990 INTRINSIC_WITH_CC(s390_vchlgs);
20991 INTRINSIC_WITH_CC(s390_vchlqs);
20992
20993 INTRINSIC_WITH_CC(s390_vfaebs);
20994 INTRINSIC_WITH_CC(s390_vfaehs);
20995 INTRINSIC_WITH_CC(s390_vfaefs);
20996
20997 INTRINSIC_WITH_CC(s390_vfaezbs);
20998 INTRINSIC_WITH_CC(s390_vfaezhs);
20999 INTRINSIC_WITH_CC(s390_vfaezfs);
21000
21001 INTRINSIC_WITH_CC(s390_vfeebs);
21002 INTRINSIC_WITH_CC(s390_vfeehs);
21003 INTRINSIC_WITH_CC(s390_vfeefs);
21004
21005 INTRINSIC_WITH_CC(s390_vfeezbs);
21006 INTRINSIC_WITH_CC(s390_vfeezhs);
21007 INTRINSIC_WITH_CC(s390_vfeezfs);
21008
21009 INTRINSIC_WITH_CC(s390_vfenebs);
21010 INTRINSIC_WITH_CC(s390_vfenehs);
21011 INTRINSIC_WITH_CC(s390_vfenefs);
21012
21013 INTRINSIC_WITH_CC(s390_vfenezbs);
21014 INTRINSIC_WITH_CC(s390_vfenezhs);
21015 INTRINSIC_WITH_CC(s390_vfenezfs);
21016
21017 INTRINSIC_WITH_CC(s390_vistrbs);
21018 INTRINSIC_WITH_CC(s390_vistrhs);
21019 INTRINSIC_WITH_CC(s390_vistrfs);
21020
21021 INTRINSIC_WITH_CC(s390_vstrcbs);
21022 INTRINSIC_WITH_CC(s390_vstrchs);
21023 INTRINSIC_WITH_CC(s390_vstrcfs);
21024
21025 INTRINSIC_WITH_CC(s390_vstrczbs);
21026 INTRINSIC_WITH_CC(s390_vstrczhs);
21027 INTRINSIC_WITH_CC(s390_vstrczfs);
21028
21029 INTRINSIC_WITH_CC(s390_vfcesbs);
21030 INTRINSIC_WITH_CC(s390_vfcedbs);
21031 INTRINSIC_WITH_CC(s390_vfchsbs);
21032 INTRINSIC_WITH_CC(s390_vfchdbs);
21033 INTRINSIC_WITH_CC(s390_vfchesbs);
21034 INTRINSIC_WITH_CC(s390_vfchedbs);
21035
21036 INTRINSIC_WITH_CC(s390_vftcisb);
21037 INTRINSIC_WITH_CC(s390_vftcidb);
21038
21039 INTRINSIC_WITH_CC(s390_vstrsb);
21040 INTRINSIC_WITH_CC(s390_vstrsh);
21041 INTRINSIC_WITH_CC(s390_vstrsf);
21042
21043 INTRINSIC_WITH_CC(s390_vstrszb);
21044 INTRINSIC_WITH_CC(s390_vstrszh);
21045 INTRINSIC_WITH_CC(s390_vstrszf);
21046
21047#undef INTRINSIC_WITH_CC
21048
21049 default:
21050 return nullptr;
21051 }
21052}
21053
21054namespace {
21055// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
21056struct NVPTXMmaLdstInfo {
21057 unsigned NumResults; // Number of elements to load/store
21058 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
21059 unsigned IID_col;
21060 unsigned IID_row;
21061};
21062
21063#define MMA_INTR(geom_op_type, layout) \
21064 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
21065#define MMA_LDST(n, geom_op_type) \
21066 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
21067
21068static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
21069 switch (BuiltinID) {
21070 // FP MMA loads
21071 case NVPTX::BI__hmma_m16n16k16_ld_a:
21072 return MMA_LDST(8, m16n16k16_load_a_f16);
21073 case NVPTX::BI__hmma_m16n16k16_ld_b:
21074 return MMA_LDST(8, m16n16k16_load_b_f16);
21075 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
21076 return MMA_LDST(4, m16n16k16_load_c_f16);
21077 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
21078 return MMA_LDST(8, m16n16k16_load_c_f32);
21079 case NVPTX::BI__hmma_m32n8k16_ld_a:
21080 return MMA_LDST(8, m32n8k16_load_a_f16);
21081 case NVPTX::BI__hmma_m32n8k16_ld_b:
21082 return MMA_LDST(8, m32n8k16_load_b_f16);
21083 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
21084 return MMA_LDST(4, m32n8k16_load_c_f16);
21085 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
21086 return MMA_LDST(8, m32n8k16_load_c_f32);
21087 case NVPTX::BI__hmma_m8n32k16_ld_a:
21088 return MMA_LDST(8, m8n32k16_load_a_f16);
21089 case NVPTX::BI__hmma_m8n32k16_ld_b:
21090 return MMA_LDST(8, m8n32k16_load_b_f16);
21091 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
21092 return MMA_LDST(4, m8n32k16_load_c_f16);
21093 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
21094 return MMA_LDST(8, m8n32k16_load_c_f32);
21095
21096 // Integer MMA loads
21097 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
21098 return MMA_LDST(2, m16n16k16_load_a_s8);
21099 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
21100 return MMA_LDST(2, m16n16k16_load_a_u8);
21101 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
21102 return MMA_LDST(2, m16n16k16_load_b_s8);
21103 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
21104 return MMA_LDST(2, m16n16k16_load_b_u8);
21105 case NVPTX::BI__imma_m16n16k16_ld_c:
21106 return MMA_LDST(8, m16n16k16_load_c_s32);
21107 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
21108 return MMA_LDST(4, m32n8k16_load_a_s8);
21109 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
21110 return MMA_LDST(4, m32n8k16_load_a_u8);
21111 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
21112 return MMA_LDST(1, m32n8k16_load_b_s8);
21113 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
21114 return MMA_LDST(1, m32n8k16_load_b_u8);
21115 case NVPTX::BI__imma_m32n8k16_ld_c:
21116 return MMA_LDST(8, m32n8k16_load_c_s32);
21117 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
21118 return MMA_LDST(1, m8n32k16_load_a_s8);
21119 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
21120 return MMA_LDST(1, m8n32k16_load_a_u8);
21121 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
21122 return MMA_LDST(4, m8n32k16_load_b_s8);
21123 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
21124 return MMA_LDST(4, m8n32k16_load_b_u8);
21125 case NVPTX::BI__imma_m8n32k16_ld_c:
21126 return MMA_LDST(8, m8n32k16_load_c_s32);
21127
21128 // Sub-integer MMA loads.
21129 // Only row/col layout is supported by A/B fragments.
21130 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21131 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
21132 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21133 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
21134 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21135 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
21136 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21137 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
21138 case NVPTX::BI__imma_m8n8k32_ld_c:
21139 return MMA_LDST(2, m8n8k32_load_c_s32);
21140 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21141 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
21142 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21143 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
21144 case NVPTX::BI__bmma_m8n8k128_ld_c:
21145 return MMA_LDST(2, m8n8k128_load_c_s32);
21146
21147 // Double MMA loads
21148 case NVPTX::BI__dmma_m8n8k4_ld_a:
21149 return MMA_LDST(1, m8n8k4_load_a_f64);
21150 case NVPTX::BI__dmma_m8n8k4_ld_b:
21151 return MMA_LDST(1, m8n8k4_load_b_f64);
21152 case NVPTX::BI__dmma_m8n8k4_ld_c:
21153 return MMA_LDST(2, m8n8k4_load_c_f64);
21154
21155 // Alternate float MMA loads
21156 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21157 return MMA_LDST(4, m16n16k16_load_a_bf16);
21158 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21159 return MMA_LDST(4, m16n16k16_load_b_bf16);
21160 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21161 return MMA_LDST(2, m8n32k16_load_a_bf16);
21162 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21163 return MMA_LDST(8, m8n32k16_load_b_bf16);
21164 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21165 return MMA_LDST(8, m32n8k16_load_a_bf16);
21166 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21167 return MMA_LDST(2, m32n8k16_load_b_bf16);
21168 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21169 return MMA_LDST(4, m16n16k8_load_a_tf32);
21170 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21171 return MMA_LDST(4, m16n16k8_load_b_tf32);
21172 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
21173 return MMA_LDST(8, m16n16k8_load_c_f32);
21174
21175 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
21176 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
21177 // use fragment C for both loads and stores.
21178 // FP MMA stores.
21179 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21180 return MMA_LDST(4, m16n16k16_store_d_f16);
21181 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21182 return MMA_LDST(8, m16n16k16_store_d_f32);
21183 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21184 return MMA_LDST(4, m32n8k16_store_d_f16);
21185 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21186 return MMA_LDST(8, m32n8k16_store_d_f32);
21187 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21188 return MMA_LDST(4, m8n32k16_store_d_f16);
21189 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21190 return MMA_LDST(8, m8n32k16_store_d_f32);
21191
21192 // Integer and sub-integer MMA stores.
21193 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
21194 // name, integer loads/stores use LLVM's i32.
21195 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21196 return MMA_LDST(8, m16n16k16_store_d_s32);
21197 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21198 return MMA_LDST(8, m32n8k16_store_d_s32);
21199 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21200 return MMA_LDST(8, m8n32k16_store_d_s32);
21201 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21202 return MMA_LDST(2, m8n8k32_store_d_s32);
21203 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21204 return MMA_LDST(2, m8n8k128_store_d_s32);
21205
21206 // Double MMA store
21207 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21208 return MMA_LDST(2, m8n8k4_store_d_f64);
21209
21210 // Alternate float MMA store
21211 case NVPTX::BI__mma_m16n16k8_st_c_f32:
21212 return MMA_LDST(8, m16n16k8_store_d_f32);
21213
21214 default:
21215 llvm_unreachable("Unknown MMA builtin");
21216 }
21217}
21218#undef MMA_LDST
21219#undef MMA_INTR
21220
21221
21222struct NVPTXMmaInfo {
21223 unsigned NumEltsA;
21224 unsigned NumEltsB;
21225 unsigned NumEltsC;
21226 unsigned NumEltsD;
21227
21228 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
21229 // over 'col' for layout. The index of non-satf variants is expected to match
21230 // the undocumented layout constants used by CUDA's mma.hpp.
21231 std::array<unsigned, 8> Variants;
21232
21233 unsigned getMMAIntrinsic(int Layout, bool Satf) {
21234 unsigned Index = Layout + 4 * Satf;
21235 if (Index >= Variants.size())
21236 return 0;
21237 return Variants[Index];
21238 }
21239};
21240
21241 // Returns an intrinsic that matches Layout and Satf for valid combinations of
21242 // Layout and Satf, 0 otherwise.
21243static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
21244 // clang-format off
21245#define MMA_VARIANTS(geom, type) \
21246 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
21247 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21248 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
21249 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
21250#define MMA_SATF_VARIANTS(geom, type) \
21251 MMA_VARIANTS(geom, type), \
21252 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
21253 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21254 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
21255 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
21256// Sub-integer MMA only supports row.col layout.
21257#define MMA_VARIANTS_I4(geom, type) \
21258 0, \
21259 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21260 0, \
21261 0, \
21262 0, \
21263 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21264 0, \
21265 0
21266// b1 MMA does not support .satfinite.
21267#define MMA_VARIANTS_B1_XOR(geom, type) \
21268 0, \
21269 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
21270 0, \
21271 0, \
21272 0, \
21273 0, \
21274 0, \
21275 0
21276#define MMA_VARIANTS_B1_AND(geom, type) \
21277 0, \
21278 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
21279 0, \
21280 0, \
21281 0, \
21282 0, \
21283 0, \
21284 0
21285 // clang-format on
21286 switch (BuiltinID) {
21287 // FP MMA
21288 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
21289 // NumEltsN of return value are ordered as A,B,C,D.
21290 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21291 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
21292 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21293 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
21294 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21295 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
21296 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21297 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
21298 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21299 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
21300 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21301 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
21302 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21303 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
21304 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21305 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
21306 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21307 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
21308 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21309 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
21310 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21311 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
21312 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21313 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
21314
21315 // Integer MMA
21316 case NVPTX::BI__imma_m16n16k16_mma_s8:
21317 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
21318 case NVPTX::BI__imma_m16n16k16_mma_u8:
21319 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
21320 case NVPTX::BI__imma_m32n8k16_mma_s8:
21321 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
21322 case NVPTX::BI__imma_m32n8k16_mma_u8:
21323 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
21324 case NVPTX::BI__imma_m8n32k16_mma_s8:
21325 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
21326 case NVPTX::BI__imma_m8n32k16_mma_u8:
21327 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
21328
21329 // Sub-integer MMA
21330 case NVPTX::BI__imma_m8n8k32_mma_s4:
21331 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
21332 case NVPTX::BI__imma_m8n8k32_mma_u4:
21333 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
21334 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21335 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
21336 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21337 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
21338
21339 // Double MMA
21340 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21341 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
21342
21343 // Alternate FP MMA
21344 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21345 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
21346 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21347 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
21348 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21349 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
21350 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
21351 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
21352 default:
21353 llvm_unreachable("Unexpected builtin ID.");
21354 }
21355#undef MMA_VARIANTS
21356#undef MMA_SATF_VARIANTS
21357#undef MMA_VARIANTS_I4
21358#undef MMA_VARIANTS_B1_AND
21359#undef MMA_VARIANTS_B1_XOR
21360}
21361
21362static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
21363 const CallExpr *E) {
21364 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21365 QualType ArgType = E->getArg(0)->getType();
21367 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21368 return CGF.Builder.CreateCall(
21369 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21370 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
21371}
21372
21373static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
21374 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21375 QualType ArgType = E->getArg(0)->getType();
21377 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21378
21379 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
21380 auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
21381 auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
21382 MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
21383 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
21384
21385 return LD;
21386}
21387
21388static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
21389 const CallExpr *E) {
21390 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21391 llvm::Type *ElemTy =
21392 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21393 return CGF.Builder.CreateCall(
21394 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21395 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
21396}
21397
21398static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
21399 CodeGenFunction &CGF, const CallExpr *E,
21400 int SrcSize) {
21401 return E->getNumArgs() == 3
21402 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
21403 {CGF.EmitScalarExpr(E->getArg(0)),
21404 CGF.EmitScalarExpr(E->getArg(1)),
21405 CGF.EmitScalarExpr(E->getArg(2))})
21406 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
21407 {CGF.EmitScalarExpr(E->getArg(0)),
21408 CGF.EmitScalarExpr(E->getArg(1))});
21409}
21410
21411static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
21412 const CallExpr *E, CodeGenFunction &CGF) {
21413 auto &C = CGF.CGM.getContext();
21414 if (!(C.getLangOpts().NativeHalfType ||
21415 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
21416 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
21417 " requires native half type support.");
21418 return nullptr;
21419 }
21420
21421 if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
21422 return MakeLdg(CGF, E);
21423
21424 if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
21425 return MakeLdu(IntrinsicID, CGF, E);
21426
21428 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
21429 auto *FTy = F->getFunctionType();
21430 unsigned ICEArguments = 0;
21432 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
21433 assert(Error == ASTContext::GE_None && "Should not codegen an error");
21434 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
21435 assert((ICEArguments & (1 << i)) == 0);
21436 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
21437 auto *PTy = FTy->getParamType(i);
21438 if (PTy != ArgValue->getType())
21439 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
21440 Args.push_back(ArgValue);
21441 }
21442
21443 return CGF.Builder.CreateCall(F, Args);
21444}
21445} // namespace
21446
21448 const CallExpr *E) {
21449 switch (BuiltinID) {
21450 case NVPTX::BI__nvvm_atom_add_gen_i:
21451 case NVPTX::BI__nvvm_atom_add_gen_l:
21452 case NVPTX::BI__nvvm_atom_add_gen_ll:
21453 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
21454
21455 case NVPTX::BI__nvvm_atom_sub_gen_i:
21456 case NVPTX::BI__nvvm_atom_sub_gen_l:
21457 case NVPTX::BI__nvvm_atom_sub_gen_ll:
21458 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
21459
21460 case NVPTX::BI__nvvm_atom_and_gen_i:
21461 case NVPTX::BI__nvvm_atom_and_gen_l:
21462 case NVPTX::BI__nvvm_atom_and_gen_ll:
21463 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
21464
21465 case NVPTX::BI__nvvm_atom_or_gen_i:
21466 case NVPTX::BI__nvvm_atom_or_gen_l:
21467 case NVPTX::BI__nvvm_atom_or_gen_ll:
21468 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
21469
21470 case NVPTX::BI__nvvm_atom_xor_gen_i:
21471 case NVPTX::BI__nvvm_atom_xor_gen_l:
21472 case NVPTX::BI__nvvm_atom_xor_gen_ll:
21473 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
21474
21475 case NVPTX::BI__nvvm_atom_xchg_gen_i:
21476 case NVPTX::BI__nvvm_atom_xchg_gen_l:
21477 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
21478 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
21479
21480 case NVPTX::BI__nvvm_atom_max_gen_i:
21481 case NVPTX::BI__nvvm_atom_max_gen_l:
21482 case NVPTX::BI__nvvm_atom_max_gen_ll:
21483 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
21484
21485 case NVPTX::BI__nvvm_atom_max_gen_ui:
21486 case NVPTX::BI__nvvm_atom_max_gen_ul:
21487 case NVPTX::BI__nvvm_atom_max_gen_ull:
21488 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
21489
21490 case NVPTX::BI__nvvm_atom_min_gen_i:
21491 case NVPTX::BI__nvvm_atom_min_gen_l:
21492 case NVPTX::BI__nvvm_atom_min_gen_ll:
21493 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
21494
21495 case NVPTX::BI__nvvm_atom_min_gen_ui:
21496 case NVPTX::BI__nvvm_atom_min_gen_ul:
21497 case NVPTX::BI__nvvm_atom_min_gen_ull:
21498 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
21499
21500 case NVPTX::BI__nvvm_atom_cas_gen_us:
21501 case NVPTX::BI__nvvm_atom_cas_gen_i:
21502 case NVPTX::BI__nvvm_atom_cas_gen_l:
21503 case NVPTX::BI__nvvm_atom_cas_gen_ll:
21504 // __nvvm_atom_cas_gen_* should return the old value rather than the
21505 // success flag.
21506 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
21507
21508 case NVPTX::BI__nvvm_atom_add_gen_f:
21509 case NVPTX::BI__nvvm_atom_add_gen_d: {
21510 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
21511 Value *Val = EmitScalarExpr(E->getArg(1));
21512
21513 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
21514 AtomicOrdering::SequentiallyConsistent);
21515 }
21516
21517 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
21518 Value *Ptr = EmitScalarExpr(E->getArg(0));
21519 Value *Val = EmitScalarExpr(E->getArg(1));
21520 Function *FnALI32 =
21521 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
21522 return Builder.CreateCall(FnALI32, {Ptr, Val});
21523 }
21524
21525 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
21526 Value *Ptr = EmitScalarExpr(E->getArg(0));
21527 Value *Val = EmitScalarExpr(E->getArg(1));
21528 Function *FnALD32 =
21529 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
21530 return Builder.CreateCall(FnALD32, {Ptr, Val});
21531 }
21532
21533 case NVPTX::BI__nvvm_ldg_c:
21534 case NVPTX::BI__nvvm_ldg_sc:
21535 case NVPTX::BI__nvvm_ldg_c2:
21536 case NVPTX::BI__nvvm_ldg_sc2:
21537 case NVPTX::BI__nvvm_ldg_c4:
21538 case NVPTX::BI__nvvm_ldg_sc4:
21539 case NVPTX::BI__nvvm_ldg_s:
21540 case NVPTX::BI__nvvm_ldg_s2:
21541 case NVPTX::BI__nvvm_ldg_s4:
21542 case NVPTX::BI__nvvm_ldg_i:
21543 case NVPTX::BI__nvvm_ldg_i2:
21544 case NVPTX::BI__nvvm_ldg_i4:
21545 case NVPTX::BI__nvvm_ldg_l:
21546 case NVPTX::BI__nvvm_ldg_l2:
21547 case NVPTX::BI__nvvm_ldg_ll:
21548 case NVPTX::BI__nvvm_ldg_ll2:
21549 case NVPTX::BI__nvvm_ldg_uc:
21550 case NVPTX::BI__nvvm_ldg_uc2:
21551 case NVPTX::BI__nvvm_ldg_uc4:
21552 case NVPTX::BI__nvvm_ldg_us:
21553 case NVPTX::BI__nvvm_ldg_us2:
21554 case NVPTX::BI__nvvm_ldg_us4:
21555 case NVPTX::BI__nvvm_ldg_ui:
21556 case NVPTX::BI__nvvm_ldg_ui2:
21557 case NVPTX::BI__nvvm_ldg_ui4:
21558 case NVPTX::BI__nvvm_ldg_ul:
21559 case NVPTX::BI__nvvm_ldg_ul2:
21560 case NVPTX::BI__nvvm_ldg_ull:
21561 case NVPTX::BI__nvvm_ldg_ull2:
21562 case NVPTX::BI__nvvm_ldg_f:
21563 case NVPTX::BI__nvvm_ldg_f2:
21564 case NVPTX::BI__nvvm_ldg_f4:
21565 case NVPTX::BI__nvvm_ldg_d:
21566 case NVPTX::BI__nvvm_ldg_d2:
21567 // PTX Interoperability section 2.2: "For a vector with an even number of
21568 // elements, its alignment is set to number of elements times the alignment
21569 // of its member: n*alignof(t)."
21570 return MakeLdg(*this, E);
21571
21572 case NVPTX::BI__nvvm_ldu_c:
21573 case NVPTX::BI__nvvm_ldu_sc:
21574 case NVPTX::BI__nvvm_ldu_c2:
21575 case NVPTX::BI__nvvm_ldu_sc2:
21576 case NVPTX::BI__nvvm_ldu_c4:
21577 case NVPTX::BI__nvvm_ldu_sc4:
21578 case NVPTX::BI__nvvm_ldu_s:
21579 case NVPTX::BI__nvvm_ldu_s2:
21580 case NVPTX::BI__nvvm_ldu_s4:
21581 case NVPTX::BI__nvvm_ldu_i:
21582 case NVPTX::BI__nvvm_ldu_i2:
21583 case NVPTX::BI__nvvm_ldu_i4:
21584 case NVPTX::BI__nvvm_ldu_l:
21585 case NVPTX::BI__nvvm_ldu_l2:
21586 case NVPTX::BI__nvvm_ldu_ll:
21587 case NVPTX::BI__nvvm_ldu_ll2:
21588 case NVPTX::BI__nvvm_ldu_uc:
21589 case NVPTX::BI__nvvm_ldu_uc2:
21590 case NVPTX::BI__nvvm_ldu_uc4:
21591 case NVPTX::BI__nvvm_ldu_us:
21592 case NVPTX::BI__nvvm_ldu_us2:
21593 case NVPTX::BI__nvvm_ldu_us4:
21594 case NVPTX::BI__nvvm_ldu_ui:
21595 case NVPTX::BI__nvvm_ldu_ui2:
21596 case NVPTX::BI__nvvm_ldu_ui4:
21597 case NVPTX::BI__nvvm_ldu_ul:
21598 case NVPTX::BI__nvvm_ldu_ul2:
21599 case NVPTX::BI__nvvm_ldu_ull:
21600 case NVPTX::BI__nvvm_ldu_ull2:
21601 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
21602 case NVPTX::BI__nvvm_ldu_f:
21603 case NVPTX::BI__nvvm_ldu_f2:
21604 case NVPTX::BI__nvvm_ldu_f4:
21605 case NVPTX::BI__nvvm_ldu_d:
21606 case NVPTX::BI__nvvm_ldu_d2:
21607 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
21608
21609 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
21610 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
21611 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
21612 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
21613 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
21614 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
21615 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
21616 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
21617 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
21618 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
21619 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
21620 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
21621 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
21622 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
21623 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
21624 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
21625 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
21626 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
21627 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
21628 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
21629 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
21630 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
21631 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
21632 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
21633 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
21634 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
21635 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
21636 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
21637 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
21638 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
21639 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
21640 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
21641 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
21642 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
21643 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
21644 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
21645 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
21646 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
21647 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
21648 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
21649 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
21650 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
21651 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
21652 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
21653 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
21654 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
21655 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
21656 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
21657 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
21658 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
21659 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
21660 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
21661 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
21662 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
21663 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
21664 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
21665 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
21666 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
21667 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
21668 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
21669 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
21670 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
21671 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
21672 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
21673 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
21674 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
21675 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
21676 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
21677 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
21678 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
21679 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
21680 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
21681 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
21682 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
21683 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
21684 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
21685 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
21686 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
21687 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
21688 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
21689 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
21690 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
21691 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
21692 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
21693 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
21694 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
21695 Value *Ptr = EmitScalarExpr(E->getArg(0));
21696 llvm::Type *ElemTy =
21697 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21698 return Builder.CreateCall(
21700 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
21701 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21702 }
21703 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
21704 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
21705 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
21706 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
21707 Value *Ptr = EmitScalarExpr(E->getArg(0));
21708 llvm::Type *ElemTy =
21709 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21710 return Builder.CreateCall(
21712 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
21713 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21714 }
21715 case NVPTX::BI__nvvm_match_all_sync_i32p:
21716 case NVPTX::BI__nvvm_match_all_sync_i64p: {
21717 Value *Mask = EmitScalarExpr(E->getArg(0));
21718 Value *Val = EmitScalarExpr(E->getArg(1));
21719 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
21720 Value *ResultPair = Builder.CreateCall(
21721 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
21722 ? Intrinsic::nvvm_match_all_sync_i32p
21723 : Intrinsic::nvvm_match_all_sync_i64p),
21724 {Mask, Val});
21725 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
21726 PredOutPtr.getElementType());
21727 Builder.CreateStore(Pred, PredOutPtr);
21728 return Builder.CreateExtractValue(ResultPair, 0);
21729 }
21730
21731 // FP MMA loads
21732 case NVPTX::BI__hmma_m16n16k16_ld_a:
21733 case NVPTX::BI__hmma_m16n16k16_ld_b:
21734 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
21735 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
21736 case NVPTX::BI__hmma_m32n8k16_ld_a:
21737 case NVPTX::BI__hmma_m32n8k16_ld_b:
21738 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
21739 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
21740 case NVPTX::BI__hmma_m8n32k16_ld_a:
21741 case NVPTX::BI__hmma_m8n32k16_ld_b:
21742 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
21743 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
21744 // Integer MMA loads.
21745 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
21746 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
21747 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
21748 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
21749 case NVPTX::BI__imma_m16n16k16_ld_c:
21750 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
21751 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
21752 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
21753 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
21754 case NVPTX::BI__imma_m32n8k16_ld_c:
21755 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
21756 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
21757 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
21758 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
21759 case NVPTX::BI__imma_m8n32k16_ld_c:
21760 // Sub-integer MMA loads.
21761 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21762 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21763 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21764 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21765 case NVPTX::BI__imma_m8n8k32_ld_c:
21766 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21767 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21768 case NVPTX::BI__bmma_m8n8k128_ld_c:
21769 // Double MMA loads.
21770 case NVPTX::BI__dmma_m8n8k4_ld_a:
21771 case NVPTX::BI__dmma_m8n8k4_ld_b:
21772 case NVPTX::BI__dmma_m8n8k4_ld_c:
21773 // Alternate float MMA loads.
21774 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21775 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21776 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21777 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21778 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21779 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21780 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21781 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21782 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
21783 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21784 Value *Src = EmitScalarExpr(E->getArg(1));
21785 Value *Ldm = EmitScalarExpr(E->getArg(2));
21786 std::optional<llvm::APSInt> isColMajorArg =
21787 E->getArg(3)->getIntegerConstantExpr(getContext());
21788 if (!isColMajorArg)
21789 return nullptr;
21790 bool isColMajor = isColMajorArg->getSExtValue();
21791 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21792 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21793 if (IID == 0)
21794 return nullptr;
21795
21796 Value *Result =
21797 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
21798
21799 // Save returned values.
21800 assert(II.NumResults);
21801 if (II.NumResults == 1) {
21804 } else {
21805 for (unsigned i = 0; i < II.NumResults; ++i) {
21807 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
21808 Dst.getElementType()),
21810 llvm::ConstantInt::get(IntTy, i)),
21812 }
21813 }
21814 return Result;
21815 }
21816
21817 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21818 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21819 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21820 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21821 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21822 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21823 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21824 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21825 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21826 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21827 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21828 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21829 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
21830 Value *Dst = EmitScalarExpr(E->getArg(0));
21831 Address Src = EmitPointerWithAlignment(E->getArg(1));
21832 Value *Ldm = EmitScalarExpr(E->getArg(2));
21833 std::optional<llvm::APSInt> isColMajorArg =
21834 E->getArg(3)->getIntegerConstantExpr(getContext());
21835 if (!isColMajorArg)
21836 return nullptr;
21837 bool isColMajor = isColMajorArg->getSExtValue();
21838 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21839 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21840 if (IID == 0)
21841 return nullptr;
21842 Function *Intrinsic =
21843 CGM.getIntrinsic(IID, Dst->getType());
21844 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
21845 SmallVector<Value *, 10> Values = {Dst};
21846 for (unsigned i = 0; i < II.NumResults; ++i) {
21848 Src.getElementType(),
21850 llvm::ConstantInt::get(IntTy, i)),
21852 Values.push_back(Builder.CreateBitCast(V, ParamType));
21853 }
21854 Values.push_back(Ldm);
21855 Value *Result = Builder.CreateCall(Intrinsic, Values);
21856 return Result;
21857 }
21858
21859 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
21860 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
21861 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21862 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21863 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21864 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21865 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21866 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21867 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21868 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21869 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21870 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21871 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21872 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21873 case NVPTX::BI__imma_m16n16k16_mma_s8:
21874 case NVPTX::BI__imma_m16n16k16_mma_u8:
21875 case NVPTX::BI__imma_m32n8k16_mma_s8:
21876 case NVPTX::BI__imma_m32n8k16_mma_u8:
21877 case NVPTX::BI__imma_m8n32k16_mma_s8:
21878 case NVPTX::BI__imma_m8n32k16_mma_u8:
21879 case NVPTX::BI__imma_m8n8k32_mma_s4:
21880 case NVPTX::BI__imma_m8n8k32_mma_u4:
21881 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21882 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21883 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21884 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21885 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21886 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21887 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
21888 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21889 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
21890 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
21891 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
21892 std::optional<llvm::APSInt> LayoutArg =
21893 E->getArg(4)->getIntegerConstantExpr(getContext());
21894 if (!LayoutArg)
21895 return nullptr;
21896 int Layout = LayoutArg->getSExtValue();
21897 if (Layout < 0 || Layout > 3)
21898 return nullptr;
21899 llvm::APSInt SatfArg;
21900 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
21901 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
21902 SatfArg = 0; // .b1 does not have satf argument.
21903 else if (std::optional<llvm::APSInt> OptSatfArg =
21904 E->getArg(5)->getIntegerConstantExpr(getContext()))
21905 SatfArg = *OptSatfArg;
21906 else
21907 return nullptr;
21908 bool Satf = SatfArg.getSExtValue();
21909 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
21910 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
21911 if (IID == 0) // Unsupported combination of Layout/Satf.
21912 return nullptr;
21913
21915 Function *Intrinsic = CGM.getIntrinsic(IID);
21916 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
21917 // Load A
21918 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
21920 SrcA.getElementType(),
21921 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
21922 llvm::ConstantInt::get(IntTy, i)),
21924 Values.push_back(Builder.CreateBitCast(V, AType));
21925 }
21926 // Load B
21927 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
21928 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
21930 SrcB.getElementType(),
21931 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
21932 llvm::ConstantInt::get(IntTy, i)),
21934 Values.push_back(Builder.CreateBitCast(V, BType));
21935 }
21936 // Load C
21937 llvm::Type *CType =
21938 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
21939 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
21941 SrcC.getElementType(),
21942 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
21943 llvm::ConstantInt::get(IntTy, i)),
21945 Values.push_back(Builder.CreateBitCast(V, CType));
21946 }
21947 Value *Result = Builder.CreateCall(Intrinsic, Values);
21948 llvm::Type *DType = Dst.getElementType();
21949 for (unsigned i = 0; i < MI.NumEltsD; ++i)
21951 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
21953 llvm::ConstantInt::get(IntTy, i)),
21955 return Result;
21956 }
21957 // The following builtins require half type support
21958 case NVPTX::BI__nvvm_ex2_approx_f16:
21959 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
21960 case NVPTX::BI__nvvm_ex2_approx_f16x2:
21961 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
21962 case NVPTX::BI__nvvm_ff2f16x2_rn:
21963 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
21964 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
21965 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
21966 case NVPTX::BI__nvvm_ff2f16x2_rz:
21967 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
21968 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
21969 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
21970 case NVPTX::BI__nvvm_fma_rn_f16:
21971 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
21972 case NVPTX::BI__nvvm_fma_rn_f16x2:
21973 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
21974 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
21975 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
21976 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
21977 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
21978 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
21979 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
21980 *this);
21981 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
21982 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
21983 *this);
21984 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
21985 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
21986 *this);
21987 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
21988 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
21989 *this);
21990 case NVPTX::BI__nvvm_fma_rn_relu_f16:
21991 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
21992 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
21993 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
21994 case NVPTX::BI__nvvm_fma_rn_sat_f16:
21995 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
21996 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
21997 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
21998 case NVPTX::BI__nvvm_fmax_f16:
21999 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
22000 case NVPTX::BI__nvvm_fmax_f16x2:
22001 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
22002 case NVPTX::BI__nvvm_fmax_ftz_f16:
22003 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
22004 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
22005 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
22006 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
22007 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
22008 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
22009 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
22010 *this);
22011 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
22012 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
22013 E, *this);
22014 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
22015 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
22016 BuiltinID, E, *this);
22017 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
22018 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
22019 *this);
22020 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
22021 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
22022 E, *this);
22023 case NVPTX::BI__nvvm_fmax_nan_f16:
22024 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
22025 case NVPTX::BI__nvvm_fmax_nan_f16x2:
22026 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
22027 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
22028 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
22029 *this);
22030 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
22031 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
22032 E, *this);
22033 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
22034 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
22035 *this);
22036 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
22037 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
22038 *this);
22039 case NVPTX::BI__nvvm_fmin_f16:
22040 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
22041 case NVPTX::BI__nvvm_fmin_f16x2:
22042 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
22043 case NVPTX::BI__nvvm_fmin_ftz_f16:
22044 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
22045 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
22046 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
22047 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
22048 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
22049 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
22050 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
22051 *this);
22052 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
22053 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
22054 E, *this);
22055 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
22056 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
22057 BuiltinID, E, *this);
22058 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
22059 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
22060 *this);
22061 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
22062 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
22063 E, *this);
22064 case NVPTX::BI__nvvm_fmin_nan_f16:
22065 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
22066 case NVPTX::BI__nvvm_fmin_nan_f16x2:
22067 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
22068 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
22069 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
22070 *this);
22071 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
22072 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
22073 E, *this);
22074 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
22075 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
22076 *this);
22077 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
22078 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
22079 *this);
22080 case NVPTX::BI__nvvm_ldg_h:
22081 case NVPTX::BI__nvvm_ldg_h2:
22082 return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
22083 case NVPTX::BI__nvvm_ldu_h:
22084 case NVPTX::BI__nvvm_ldu_h2:
22085 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
22086 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
22087 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
22088 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
22089 4);
22090 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
22091 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
22092 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
22093 8);
22094 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
22095 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
22096 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
22097 16);
22098 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
22099 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
22100 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
22101 16);
22102 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
22103 return Builder.CreateCall(
22104 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
22105 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
22106 return Builder.CreateCall(
22107 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
22108 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
22109 return Builder.CreateCall(
22110 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
22111 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
22112 return Builder.CreateCall(
22113 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
22114 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
22115 return Builder.CreateCall(
22116 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
22117 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
22118 return Builder.CreateCall(
22119 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
22120 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
22121 return Builder.CreateCall(
22122 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
22123 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
22124 return Builder.CreateCall(
22125 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
22126 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
22127 return Builder.CreateCall(
22128 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
22129 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
22130 return Builder.CreateCall(
22131 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
22132 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
22133 return Builder.CreateCall(
22134 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
22135 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
22136 return Builder.CreateCall(
22137 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
22138 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
22139 return Builder.CreateCall(
22140 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
22141 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
22142 return Builder.CreateCall(
22143 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
22144 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
22145 return Builder.CreateCall(
22146 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
22147 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
22148 return Builder.CreateCall(
22149 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
22150 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
22151 return Builder.CreateCall(
22152 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
22153 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
22154 return Builder.CreateCall(
22155 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
22156 case NVPTX::BI__nvvm_is_explicit_cluster:
22157 return Builder.CreateCall(
22158 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
22159 case NVPTX::BI__nvvm_isspacep_shared_cluster:
22160 return Builder.CreateCall(
22161 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
22162 EmitScalarExpr(E->getArg(0)));
22163 case NVPTX::BI__nvvm_mapa:
22164 return Builder.CreateCall(
22165 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
22166 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22167 case NVPTX::BI__nvvm_mapa_shared_cluster:
22168 return Builder.CreateCall(
22169 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
22170 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22171 case NVPTX::BI__nvvm_getctarank:
22172 return Builder.CreateCall(
22173 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
22174 EmitScalarExpr(E->getArg(0)));
22175 case NVPTX::BI__nvvm_getctarank_shared_cluster:
22176 return Builder.CreateCall(
22177 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
22178 EmitScalarExpr(E->getArg(0)));
22179 case NVPTX::BI__nvvm_barrier_cluster_arrive:
22180 return Builder.CreateCall(
22181 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
22182 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
22183 return Builder.CreateCall(
22184 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
22185 case NVPTX::BI__nvvm_barrier_cluster_wait:
22186 return Builder.CreateCall(
22187 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
22188 case NVPTX::BI__nvvm_fence_sc_cluster:
22189 return Builder.CreateCall(
22190 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
22191 default:
22192 return nullptr;
22193 }
22194}
22195
22196namespace {
22197struct BuiltinAlignArgs {
22198 llvm::Value *Src = nullptr;
22199 llvm::Type *SrcType = nullptr;
22200 llvm::Value *Alignment = nullptr;
22201 llvm::Value *Mask = nullptr;
22202 llvm::IntegerType *IntType = nullptr;
22203
22204 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
22205 QualType AstType = E->getArg(0)->getType();
22206 if (AstType->isArrayType())
22207 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
22208 else
22209 Src = CGF.EmitScalarExpr(E->getArg(0));
22210 SrcType = Src->getType();
22211 if (SrcType->isPointerTy()) {
22212 IntType = IntegerType::get(
22213 CGF.getLLVMContext(),
22214 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
22215 } else {
22216 assert(SrcType->isIntegerTy());
22217 IntType = cast<llvm::IntegerType>(SrcType);
22218 }
22219 Alignment = CGF.EmitScalarExpr(E->getArg(1));
22220 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
22221 auto *One = llvm::ConstantInt::get(IntType, 1);
22222 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
22223 }
22224};
22225} // namespace
22226
22227/// Generate (x & (y-1)) == 0.
22229 BuiltinAlignArgs Args(E, *this);
22230 llvm::Value *SrcAddress = Args.Src;
22231 if (Args.SrcType->isPointerTy())
22232 SrcAddress =
22233 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
22234 return RValue::get(Builder.CreateICmpEQ(
22235 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
22236 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
22237}
22238
22239/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
22240/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
22241/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
22243 BuiltinAlignArgs Args(E, *this);
22244 llvm::Value *SrcForMask = Args.Src;
22245 if (AlignUp) {
22246 // When aligning up we have to first add the mask to ensure we go over the
22247 // next alignment value and then align down to the next valid multiple.
22248 // By adding the mask, we ensure that align_up on an already aligned
22249 // value will not change the value.
22250 if (Args.Src->getType()->isPointerTy()) {
22251 if (getLangOpts().isSignedOverflowDefined())
22252 SrcForMask =
22253 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
22254 else
22255 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
22256 /*SignedIndices=*/true,
22257 /*isSubtraction=*/false,
22258 E->getExprLoc(), "over_boundary");
22259 } else {
22260 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
22261 }
22262 }
22263 // Invert the mask to only clear the lower bits.
22264 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
22265 llvm::Value *Result = nullptr;
22266 if (Args.Src->getType()->isPointerTy()) {
22267 Result = Builder.CreateIntrinsic(
22268 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
22269 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
22270 } else {
22271 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
22272 }
22273 assert(Result->getType() == Args.SrcType);
22274 return RValue::get(Result);
22275}
22276
22278 const CallExpr *E) {
22279 switch (BuiltinID) {
22280 case WebAssembly::BI__builtin_wasm_memory_size: {
22281 llvm::Type *ResultType = ConvertType(E->getType());
22282 Value *I = EmitScalarExpr(E->getArg(0));
22283 Function *Callee =
22284 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
22285 return Builder.CreateCall(Callee, I);
22286 }
22287 case WebAssembly::BI__builtin_wasm_memory_grow: {
22288 llvm::Type *ResultType = ConvertType(E->getType());
22289 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
22290 EmitScalarExpr(E->getArg(1))};
22291 Function *Callee =
22292 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
22293 return Builder.CreateCall(Callee, Args);
22294 }
22295 case WebAssembly::BI__builtin_wasm_tls_size: {
22296 llvm::Type *ResultType = ConvertType(E->getType());
22297 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
22298 return Builder.CreateCall(Callee);
22299 }
22300 case WebAssembly::BI__builtin_wasm_tls_align: {
22301 llvm::Type *ResultType = ConvertType(E->getType());
22302 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
22303 return Builder.CreateCall(Callee);
22304 }
22305 case WebAssembly::BI__builtin_wasm_tls_base: {
22306 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
22307 return Builder.CreateCall(Callee);
22308 }
22309 case WebAssembly::BI__builtin_wasm_throw: {
22310 Value *Tag = EmitScalarExpr(E->getArg(0));
22311 Value *Obj = EmitScalarExpr(E->getArg(1));
22312 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
22313 return Builder.CreateCall(Callee, {Tag, Obj});
22314 }
22315 case WebAssembly::BI__builtin_wasm_rethrow: {
22316 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
22317 return Builder.CreateCall(Callee);
22318 }
22319 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
22320 Value *Addr = EmitScalarExpr(E->getArg(0));
22321 Value *Expected = EmitScalarExpr(E->getArg(1));
22322 Value *Timeout = EmitScalarExpr(E->getArg(2));
22323 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
22324 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22325 }
22326 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
22327 Value *Addr = EmitScalarExpr(E->getArg(0));
22328 Value *Expected = EmitScalarExpr(E->getArg(1));
22329 Value *Timeout = EmitScalarExpr(E->getArg(2));
22330 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
22331 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22332 }
22333 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
22334 Value *Addr = EmitScalarExpr(E->getArg(0));
22335 Value *Count = EmitScalarExpr(E->getArg(1));
22336 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
22337 return Builder.CreateCall(Callee, {Addr, Count});
22338 }
22339 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
22340 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
22341 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
22342 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
22343 Value *Src = EmitScalarExpr(E->getArg(0));
22344 llvm::Type *ResT = ConvertType(E->getType());
22345 Function *Callee =
22346 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
22347 return Builder.CreateCall(Callee, {Src});
22348 }
22349 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
22350 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
22351 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
22352 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
22353 Value *Src = EmitScalarExpr(E->getArg(0));
22354 llvm::Type *ResT = ConvertType(E->getType());
22355 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
22356 {ResT, Src->getType()});
22357 return Builder.CreateCall(Callee, {Src});
22358 }
22359 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
22360 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
22361 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
22362 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
22363 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
22364 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
22365 Value *Src = EmitScalarExpr(E->getArg(0));
22366 llvm::Type *ResT = ConvertType(E->getType());
22367 Function *Callee =
22368 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
22369 return Builder.CreateCall(Callee, {Src});
22370 }
22371 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
22372 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
22373 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
22374 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
22375 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
22376 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
22377 Value *Src = EmitScalarExpr(E->getArg(0));
22378 llvm::Type *ResT = ConvertType(E->getType());
22379 Function *Callee =
22380 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
22381 return Builder.CreateCall(Callee, {Src});
22382 }
22383 case WebAssembly::BI__builtin_wasm_min_f32:
22384 case WebAssembly::BI__builtin_wasm_min_f64:
22385 case WebAssembly::BI__builtin_wasm_min_f16x8:
22386 case WebAssembly::BI__builtin_wasm_min_f32x4:
22387 case WebAssembly::BI__builtin_wasm_min_f64x2: {
22388 Value *LHS = EmitScalarExpr(E->getArg(0));
22389 Value *RHS = EmitScalarExpr(E->getArg(1));
22390 Function *Callee =
22391 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
22392 return Builder.CreateCall(Callee, {LHS, RHS});
22393 }
22394 case WebAssembly::BI__builtin_wasm_max_f32:
22395 case WebAssembly::BI__builtin_wasm_max_f64:
22396 case WebAssembly::BI__builtin_wasm_max_f16x8:
22397 case WebAssembly::BI__builtin_wasm_max_f32x4:
22398 case WebAssembly::BI__builtin_wasm_max_f64x2: {
22399 Value *LHS = EmitScalarExpr(E->getArg(0));
22400 Value *RHS = EmitScalarExpr(E->getArg(1));
22401 Function *Callee =
22402 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
22403 return Builder.CreateCall(Callee, {LHS, RHS});
22404 }
22405 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
22406 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
22407 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
22408 Value *LHS = EmitScalarExpr(E->getArg(0));
22409 Value *RHS = EmitScalarExpr(E->getArg(1));
22410 Function *Callee =
22411 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
22412 return Builder.CreateCall(Callee, {LHS, RHS});
22413 }
22414 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
22415 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
22416 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
22417 Value *LHS = EmitScalarExpr(E->getArg(0));
22418 Value *RHS = EmitScalarExpr(E->getArg(1));
22419 Function *Callee =
22420 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
22421 return Builder.CreateCall(Callee, {LHS, RHS});
22422 }
22423 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22424 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22425 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22426 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22427 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22428 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22429 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22430 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22431 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22432 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22433 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22434 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
22435 unsigned IntNo;
22436 switch (BuiltinID) {
22437 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22438 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22439 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22440 IntNo = Intrinsic::ceil;
22441 break;
22442 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22443 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22444 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22445 IntNo = Intrinsic::floor;
22446 break;
22447 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22448 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22449 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22450 IntNo = Intrinsic::trunc;
22451 break;
22452 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22453 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22454 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
22455 IntNo = Intrinsic::nearbyint;
22456 break;
22457 default:
22458 llvm_unreachable("unexpected builtin ID");
22459 }
22460 Value *Value = EmitScalarExpr(E->getArg(0));
22462 return Builder.CreateCall(Callee, Value);
22463 }
22464 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
22465 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
22466 return Builder.CreateCall(Callee);
22467 }
22468 case WebAssembly::BI__builtin_wasm_ref_null_func: {
22469 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
22470 return Builder.CreateCall(Callee);
22471 }
22472 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
22473 Value *Src = EmitScalarExpr(E->getArg(0));
22474 Value *Indices = EmitScalarExpr(E->getArg(1));
22475 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
22476 return Builder.CreateCall(Callee, {Src, Indices});
22477 }
22478 case WebAssembly::BI__builtin_wasm_abs_i8x16:
22479 case WebAssembly::BI__builtin_wasm_abs_i16x8:
22480 case WebAssembly::BI__builtin_wasm_abs_i32x4:
22481 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
22482 Value *Vec = EmitScalarExpr(E->getArg(0));
22483 Value *Neg = Builder.CreateNeg(Vec, "neg");
22484 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
22485 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
22486 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
22487 }
22488 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
22489 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
22490 Value *LHS = EmitScalarExpr(E->getArg(0));
22491 Value *RHS = EmitScalarExpr(E->getArg(1));
22492 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
22493 ConvertType(E->getType()));
22494 return Builder.CreateCall(Callee, {LHS, RHS});
22495 }
22496 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
22497 Value *LHS = EmitScalarExpr(E->getArg(0));
22498 Value *RHS = EmitScalarExpr(E->getArg(1));
22499 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
22500 return Builder.CreateCall(Callee, {LHS, RHS});
22501 }
22502 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22503 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22504 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22505 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
22506 Value *Vec = EmitScalarExpr(E->getArg(0));
22507 unsigned IntNo;
22508 switch (BuiltinID) {
22509 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22510 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22511 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
22512 break;
22513 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22514 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
22515 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
22516 break;
22517 default:
22518 llvm_unreachable("unexpected builtin ID");
22519 }
22520
22522 return Builder.CreateCall(Callee, Vec);
22523 }
22524 case WebAssembly::BI__builtin_wasm_bitselect: {
22525 Value *V1 = EmitScalarExpr(E->getArg(0));
22526 Value *V2 = EmitScalarExpr(E->getArg(1));
22527 Value *C = EmitScalarExpr(E->getArg(2));
22528 Function *Callee =
22529 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
22530 return Builder.CreateCall(Callee, {V1, V2, C});
22531 }
22532 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
22533 Value *LHS = EmitScalarExpr(E->getArg(0));
22534 Value *RHS = EmitScalarExpr(E->getArg(1));
22535 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
22536 return Builder.CreateCall(Callee, {LHS, RHS});
22537 }
22538 case WebAssembly::BI__builtin_wasm_any_true_v128:
22539 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22540 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22541 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22542 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
22543 unsigned IntNo;
22544 switch (BuiltinID) {
22545 case WebAssembly::BI__builtin_wasm_any_true_v128:
22546 IntNo = Intrinsic::wasm_anytrue;
22547 break;
22548 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22549 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22550 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22551 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
22552 IntNo = Intrinsic::wasm_alltrue;
22553 break;
22554 default:
22555 llvm_unreachable("unexpected builtin ID");
22556 }
22557 Value *Vec = EmitScalarExpr(E->getArg(0));
22558 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
22559 return Builder.CreateCall(Callee, {Vec});
22560 }
22561 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
22562 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
22563 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
22564 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
22565 Value *Vec = EmitScalarExpr(E->getArg(0));
22566 Function *Callee =
22567 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
22568 return Builder.CreateCall(Callee, {Vec});
22569 }
22570 case WebAssembly::BI__builtin_wasm_abs_f16x8:
22571 case WebAssembly::BI__builtin_wasm_abs_f32x4:
22572 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
22573 Value *Vec = EmitScalarExpr(E->getArg(0));
22574 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
22575 return Builder.CreateCall(Callee, {Vec});
22576 }
22577 case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
22578 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
22579 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
22580 Value *Vec = EmitScalarExpr(E->getArg(0));
22581 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
22582 return Builder.CreateCall(Callee, {Vec});
22583 }
22584 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22585 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22586 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22587 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
22588 Value *Low = EmitScalarExpr(E->getArg(0));
22589 Value *High = EmitScalarExpr(E->getArg(1));
22590 unsigned IntNo;
22591 switch (BuiltinID) {
22592 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22593 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22594 IntNo = Intrinsic::wasm_narrow_signed;
22595 break;
22596 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22597 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
22598 IntNo = Intrinsic::wasm_narrow_unsigned;
22599 break;
22600 default:
22601 llvm_unreachable("unexpected builtin ID");
22602 }
22603 Function *Callee =
22604 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
22605 return Builder.CreateCall(Callee, {Low, High});
22606 }
22607 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22608 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
22609 Value *Vec = EmitScalarExpr(E->getArg(0));
22610 unsigned IntNo;
22611 switch (BuiltinID) {
22612 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22613 IntNo = Intrinsic::fptosi_sat;
22614 break;
22615 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
22616 IntNo = Intrinsic::fptoui_sat;
22617 break;
22618 default:
22619 llvm_unreachable("unexpected builtin ID");
22620 }
22621 llvm::Type *SrcT = Vec->getType();
22622 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
22623 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
22624 Value *Trunc = Builder.CreateCall(Callee, Vec);
22625 Value *Splat = Constant::getNullValue(TruncT);
22626 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
22627 }
22628 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
22629 Value *Ops[18];
22630 size_t OpIdx = 0;
22631 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
22632 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
22633 while (OpIdx < 18) {
22634 std::optional<llvm::APSInt> LaneConst =
22635 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
22636 assert(LaneConst && "Constant arg isn't actually constant?");
22637 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
22638 }
22639 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
22640 return Builder.CreateCall(Callee, Ops);
22641 }
22642 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22643 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22644 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22645 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22646 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22647 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
22648 Value *A = EmitScalarExpr(E->getArg(0));
22649 Value *B = EmitScalarExpr(E->getArg(1));
22650 Value *C = EmitScalarExpr(E->getArg(2));
22651 unsigned IntNo;
22652 switch (BuiltinID) {
22653 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22654 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22655 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22656 IntNo = Intrinsic::wasm_relaxed_madd;
22657 break;
22658 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22659 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22660 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
22661 IntNo = Intrinsic::wasm_relaxed_nmadd;
22662 break;
22663 default:
22664 llvm_unreachable("unexpected builtin ID");
22665 }
22666 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
22667 return Builder.CreateCall(Callee, {A, B, C});
22668 }
22669 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
22670 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
22671 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
22672 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
22673 Value *A = EmitScalarExpr(E->getArg(0));
22674 Value *B = EmitScalarExpr(E->getArg(1));
22675 Value *C = EmitScalarExpr(E->getArg(2));
22676 Function *Callee =
22677 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
22678 return Builder.CreateCall(Callee, {A, B, C});
22679 }
22680 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
22681 Value *Src = EmitScalarExpr(E->getArg(0));
22682 Value *Indices = EmitScalarExpr(E->getArg(1));
22683 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
22684 return Builder.CreateCall(Callee, {Src, Indices});
22685 }
22686 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22687 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22688 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22689 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
22690 Value *LHS = EmitScalarExpr(E->getArg(0));
22691 Value *RHS = EmitScalarExpr(E->getArg(1));
22692 unsigned IntNo;
22693 switch (BuiltinID) {
22694 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22695 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22696 IntNo = Intrinsic::wasm_relaxed_min;
22697 break;
22698 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22699 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
22700 IntNo = Intrinsic::wasm_relaxed_max;
22701 break;
22702 default:
22703 llvm_unreachable("unexpected builtin ID");
22704 }
22705 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
22706 return Builder.CreateCall(Callee, {LHS, RHS});
22707 }
22708 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22709 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22710 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22711 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
22712 Value *Vec = EmitScalarExpr(E->getArg(0));
22713 unsigned IntNo;
22714 switch (BuiltinID) {
22715 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22716 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
22717 break;
22718 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22719 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
22720 break;
22721 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22722 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
22723 break;
22724 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
22725 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
22726 break;
22727 default:
22728 llvm_unreachable("unexpected builtin ID");
22729 }
22730 Function *Callee = CGM.getIntrinsic(IntNo);
22731 return Builder.CreateCall(Callee, {Vec});
22732 }
22733 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
22734 Value *LHS = EmitScalarExpr(E->getArg(0));
22735 Value *RHS = EmitScalarExpr(E->getArg(1));
22736 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
22737 return Builder.CreateCall(Callee, {LHS, RHS});
22738 }
22739 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
22740 Value *LHS = EmitScalarExpr(E->getArg(0));
22741 Value *RHS = EmitScalarExpr(E->getArg(1));
22742 Function *Callee =
22743 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
22744 return Builder.CreateCall(Callee, {LHS, RHS});
22745 }
22746 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
22747 Value *LHS = EmitScalarExpr(E->getArg(0));
22748 Value *RHS = EmitScalarExpr(E->getArg(1));
22749 Value *Acc = EmitScalarExpr(E->getArg(2));
22750 Function *Callee =
22751 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
22752 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22753 }
22754 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
22755 Value *LHS = EmitScalarExpr(E->getArg(0));
22756 Value *RHS = EmitScalarExpr(E->getArg(1));
22757 Value *Acc = EmitScalarExpr(E->getArg(2));
22758 Function *Callee =
22759 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
22760 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22761 }
22762 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
22763 Value *Addr = EmitScalarExpr(E->getArg(0));
22764 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
22765 return Builder.CreateCall(Callee, {Addr});
22766 }
22767 case WebAssembly::BI__builtin_wasm_storef16_f32: {
22768 Value *Val = EmitScalarExpr(E->getArg(0));
22769 Value *Addr = EmitScalarExpr(E->getArg(1));
22770 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
22771 return Builder.CreateCall(Callee, {Val, Addr});
22772 }
22773 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
22774 Value *Val = EmitScalarExpr(E->getArg(0));
22775 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
22776 return Builder.CreateCall(Callee, {Val});
22777 }
22778 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
22779 Value *Vector = EmitScalarExpr(E->getArg(0));
22780 Value *Index = EmitScalarExpr(E->getArg(1));
22781 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
22782 return Builder.CreateCall(Callee, {Vector, Index});
22783 }
22784 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
22785 Value *Vector = EmitScalarExpr(E->getArg(0));
22786 Value *Index = EmitScalarExpr(E->getArg(1));
22787 Value *Val = EmitScalarExpr(E->getArg(2));
22788 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
22789 return Builder.CreateCall(Callee, {Vector, Index, Val});
22790 }
22791 case WebAssembly::BI__builtin_wasm_table_get: {
22792 assert(E->getArg(0)->getType()->isArrayType());
22793 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22794 Value *Index = EmitScalarExpr(E->getArg(1));
22797 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
22798 else if (E->getType().isWebAssemblyFuncrefType())
22799 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
22800 else
22801 llvm_unreachable(
22802 "Unexpected reference type for __builtin_wasm_table_get");
22803 return Builder.CreateCall(Callee, {Table, Index});
22804 }
22805 case WebAssembly::BI__builtin_wasm_table_set: {
22806 assert(E->getArg(0)->getType()->isArrayType());
22807 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22808 Value *Index = EmitScalarExpr(E->getArg(1));
22809 Value *Val = EmitScalarExpr(E->getArg(2));
22811 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22812 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
22813 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22814 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
22815 else
22816 llvm_unreachable(
22817 "Unexpected reference type for __builtin_wasm_table_set");
22818 return Builder.CreateCall(Callee, {Table, Index, Val});
22819 }
22820 case WebAssembly::BI__builtin_wasm_table_size: {
22821 assert(E->getArg(0)->getType()->isArrayType());
22822 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22823 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
22824 return Builder.CreateCall(Callee, Value);
22825 }
22826 case WebAssembly::BI__builtin_wasm_table_grow: {
22827 assert(E->getArg(0)->getType()->isArrayType());
22828 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22829 Value *Val = EmitScalarExpr(E->getArg(1));
22830 Value *NElems = EmitScalarExpr(E->getArg(2));
22831
22833 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
22834 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
22835 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22836 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22837 else
22838 llvm_unreachable(
22839 "Unexpected reference type for __builtin_wasm_table_grow");
22840
22841 return Builder.CreateCall(Callee, {Table, Val, NElems});
22842 }
22843 case WebAssembly::BI__builtin_wasm_table_fill: {
22844 assert(E->getArg(0)->getType()->isArrayType());
22845 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22846 Value *Index = EmitScalarExpr(E->getArg(1));
22847 Value *Val = EmitScalarExpr(E->getArg(2));
22848 Value *NElems = EmitScalarExpr(E->getArg(3));
22849
22851 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22852 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
22853 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22854 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22855 else
22856 llvm_unreachable(
22857 "Unexpected reference type for __builtin_wasm_table_fill");
22858
22859 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
22860 }
22861 case WebAssembly::BI__builtin_wasm_table_copy: {
22862 assert(E->getArg(0)->getType()->isArrayType());
22863 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22864 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
22865 Value *DstIdx = EmitScalarExpr(E->getArg(2));
22866 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
22867 Value *NElems = EmitScalarExpr(E->getArg(4));
22868
22869 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
22870
22871 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
22872 }
22873 default:
22874 return nullptr;
22875 }
22876}
22877
22878static std::pair<Intrinsic::ID, unsigned>
22880 struct Info {
22881 unsigned BuiltinID;
22882 Intrinsic::ID IntrinsicID;
22883 unsigned VecLen;
22884 };
22885 static Info Infos[] = {
22886#define CUSTOM_BUILTIN_MAPPING(x,s) \
22887 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
22888 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
22889 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
22890 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
22891 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
22892 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
22893 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
22894 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
22895 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
22896 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
22897 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
22898 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
22899 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
22900 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
22901 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
22902 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
22903 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
22904 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
22905 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
22906 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
22907 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
22908 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
22909 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
22910 // Legacy builtins that take a vector in place of a vector predicate.
22911 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
22912 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
22913 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
22914 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
22915 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
22916 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
22917 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
22918 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
22919#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
22920#undef CUSTOM_BUILTIN_MAPPING
22921 };
22922
22923 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
22924 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
22925 (void)SortOnce;
22926
22927 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
22928 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
22929 return {Intrinsic::not_intrinsic, 0};
22930
22931 return {F->IntrinsicID, F->VecLen};
22932}
22933
22935 const CallExpr *E) {
22936 Intrinsic::ID ID;
22937 unsigned VecLen;
22938 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
22939
22940 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
22941 // The base pointer is passed by address, so it needs to be loaded.
22942 Address A = EmitPointerWithAlignment(E->getArg(0));
22944 llvm::Value *Base = Builder.CreateLoad(BP);
22945 // The treatment of both loads and stores is the same: the arguments for
22946 // the builtin are the same as the arguments for the intrinsic.
22947 // Load:
22948 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
22949 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
22950 // Store:
22951 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
22952 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
22954 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
22955 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22956
22957 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
22958 // The load intrinsics generate two results (Value, NewBase), stores
22959 // generate one (NewBase). The new base address needs to be stored.
22960 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
22961 : Result;
22962 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
22963 Address Dest = EmitPointerWithAlignment(E->getArg(0));
22964 llvm::Value *RetVal =
22965 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
22966 if (IsLoad)
22967 RetVal = Builder.CreateExtractValue(Result, 0);
22968 return RetVal;
22969 };
22970
22971 // Handle the conversion of bit-reverse load intrinsics to bit code.
22972 // The intrinsic call after this function only reads from memory and the
22973 // write to memory is dealt by the store instruction.
22974 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
22975 // The intrinsic generates one result, which is the new value for the base
22976 // pointer. It needs to be returned. The result of the load instruction is
22977 // passed to intrinsic by address, so the value needs to be stored.
22978 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
22979
22980 // Expressions like &(*pt++) will be incremented per evaluation.
22981 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
22982 // per call.
22983 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
22984 DestAddr = DestAddr.withElementType(Int8Ty);
22985 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
22986
22987 // Operands are Base, Dest, Modifier.
22988 // The intrinsic format in LLVM IR is defined as
22989 // { ValueType, i8* } (i8*, i32).
22990 llvm::Value *Result = Builder.CreateCall(
22991 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
22992
22993 // The value needs to be stored as the variable is passed by reference.
22994 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
22995
22996 // The store needs to be truncated to fit the destination type.
22997 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
22998 // to be handled with stores of respective destination type.
22999 DestVal = Builder.CreateTrunc(DestVal, DestTy);
23000
23001 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
23002 // The updated value of the base pointer is returned.
23003 return Builder.CreateExtractValue(Result, 1);
23004 };
23005
23006 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
23007 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
23008 : Intrinsic::hexagon_V6_vandvrt;
23009 return Builder.CreateCall(CGM.getIntrinsic(ID),
23010 {Vec, Builder.getInt32(-1)});
23011 };
23012 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
23013 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
23014 : Intrinsic::hexagon_V6_vandqrt;
23015 return Builder.CreateCall(CGM.getIntrinsic(ID),
23016 {Pred, Builder.getInt32(-1)});
23017 };
23018
23019 switch (BuiltinID) {
23020 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
23021 // and the corresponding C/C++ builtins use loads/stores to update
23022 // the predicate.
23023 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
23024 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
23025 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
23026 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
23027 // Get the type from the 0-th argument.
23028 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
23029 Address PredAddr =
23030 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
23031 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
23032 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
23033 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
23034
23035 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
23036 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
23037 PredAddr.getAlignment());
23038 return Builder.CreateExtractValue(Result, 0);
23039 }
23040 // These are identical to the builtins above, except they don't consume
23041 // input carry, only generate carry-out. Since they still produce two
23042 // outputs, generate the store of the predicate, but no load.
23043 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
23044 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
23045 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
23046 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
23047 // Get the type from the 0-th argument.
23048 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
23049 Address PredAddr =
23050 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
23051 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
23052 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
23053
23054 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
23055 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
23056 PredAddr.getAlignment());
23057 return Builder.CreateExtractValue(Result, 0);
23058 }
23059
23060 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
23061 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
23062 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
23063 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
23064 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
23065 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
23066 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
23067 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
23069 const Expr *PredOp = E->getArg(0);
23070 // There will be an implicit cast to a boolean vector. Strip it.
23071 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
23072 if (Cast->getCastKind() == CK_BitCast)
23073 PredOp = Cast->getSubExpr();
23074 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
23075 }
23076 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
23077 Ops.push_back(EmitScalarExpr(E->getArg(i)));
23078 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
23079 }
23080
23081 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
23082 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
23083 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
23084 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
23085 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
23086 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
23087 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
23088 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
23089 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
23090 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
23091 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
23092 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
23093 return MakeCircOp(ID, /*IsLoad=*/true);
23094 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
23095 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
23096 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
23097 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
23098 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
23099 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
23100 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
23101 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
23102 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
23103 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
23104 return MakeCircOp(ID, /*IsLoad=*/false);
23105 case Hexagon::BI__builtin_brev_ldub:
23106 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
23107 case Hexagon::BI__builtin_brev_ldb:
23108 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
23109 case Hexagon::BI__builtin_brev_lduh:
23110 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
23111 case Hexagon::BI__builtin_brev_ldh:
23112 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
23113 case Hexagon::BI__builtin_brev_ldw:
23114 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
23115 case Hexagon::BI__builtin_brev_ldd:
23116 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
23117 } // switch
23118
23119 return nullptr;
23120}
23121
23123 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
23124 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
23125 return EmitRISCVCpuIs(CPUStr);
23126}
23127
23128Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) {
23129 llvm::Type *Int32Ty = Builder.getInt32Ty();
23130 llvm::Type *Int64Ty = Builder.getInt64Ty();
23131 llvm::StructType *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty);
23132 llvm::Constant *RISCVCPUModel =
23133 CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model");
23134 cast<llvm::GlobalValue>(RISCVCPUModel)->setDSOLocal(true);
23135
23136 auto loadRISCVCPUID = [&](unsigned Index) {
23137 Value *Ptr = Builder.CreateStructGEP(StructTy, RISCVCPUModel, Index);
23138 Value *CPUID = Builder.CreateAlignedLoad(StructTy->getTypeAtIndex(Index),
23139 Ptr, llvm::MaybeAlign());
23140 return CPUID;
23141 };
23142
23143 const llvm::RISCV::CPUModel Model = llvm::RISCV::getCPUModel(CPUStr);
23144
23145 // Compare mvendorid.
23146 Value *VendorID = loadRISCVCPUID(0);
23147 Value *Result =
23148 Builder.CreateICmpEQ(VendorID, Builder.getInt32(Model.MVendorID));
23149
23150 // Compare marchid.
23151 Value *ArchID = loadRISCVCPUID(1);
23152 Result = Builder.CreateAnd(
23153 Result, Builder.CreateICmpEQ(ArchID, Builder.getInt64(Model.MArchID)));
23154
23155 // Compare mimpid.
23156 Value *ImpID = loadRISCVCPUID(2);
23157 Result = Builder.CreateAnd(
23158 Result, Builder.CreateICmpEQ(ImpID, Builder.getInt64(Model.MImpID)));
23159
23160 return Result;
23161}
23162
23164 const CallExpr *E,
23165 ReturnValueSlot ReturnValue) {
23166
23167 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
23168 return EmitRISCVCpuSupports(E);
23169 if (BuiltinID == Builtin::BI__builtin_cpu_init)
23170 return EmitRISCVCpuInit();
23171 if (BuiltinID == Builtin::BI__builtin_cpu_is)
23172 return EmitRISCVCpuIs(E);
23173
23175 llvm::Type *ResultType = ConvertType(E->getType());
23176
23177 // Find out if any arguments are required to be integer constant expressions.
23178 unsigned ICEArguments = 0;
23180 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
23181 if (Error == ASTContext::GE_Missing_type) {
23182 // Vector intrinsics don't have a type string.
23183 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
23184 BuiltinID <= clang::RISCV::LastRVVBuiltin);
23185 ICEArguments = 0;
23186 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
23187 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
23188 ICEArguments = 1 << 1;
23189 } else {
23190 assert(Error == ASTContext::GE_None && "Unexpected error");
23191 }
23192
23193 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
23194 ICEArguments |= (1 << 1);
23195 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
23196 ICEArguments |= (1 << 2);
23197
23198 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
23199 // Handle aggregate argument, namely RVV tuple types in segment load/store
23200 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
23201 LValue L = EmitAggExprToLValue(E->getArg(i));
23202 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());
23203 Ops.push_back(AggValue);
23204 continue;
23205 }
23206 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
23207 }
23208
23209 Intrinsic::ID ID = Intrinsic::not_intrinsic;
23210 // The 0th bit simulates the `vta` of RVV
23211 // The 1st bit simulates the `vma` of RVV
23212 constexpr unsigned RVV_VTA = 0x1;
23213 constexpr unsigned RVV_VMA = 0x2;
23214 int PolicyAttrs = 0;
23215 bool IsMasked = false;
23216 // This is used by segment load/store to determine it's llvm type.
23217 unsigned SegInstSEW = 8;
23218
23219 // Required for overloaded intrinsics.
23221 switch (BuiltinID) {
23222 default: llvm_unreachable("unexpected builtin ID");
23223 case RISCV::BI__builtin_riscv_orc_b_32:
23224 case RISCV::BI__builtin_riscv_orc_b_64:
23225 case RISCV::BI__builtin_riscv_clmul_32:
23226 case RISCV::BI__builtin_riscv_clmul_64:
23227 case RISCV::BI__builtin_riscv_clmulh_32:
23228 case RISCV::BI__builtin_riscv_clmulh_64:
23229 case RISCV::BI__builtin_riscv_clmulr_32:
23230 case RISCV::BI__builtin_riscv_clmulr_64:
23231 case RISCV::BI__builtin_riscv_xperm4_32:
23232 case RISCV::BI__builtin_riscv_xperm4_64:
23233 case RISCV::BI__builtin_riscv_xperm8_32:
23234 case RISCV::BI__builtin_riscv_xperm8_64:
23235 case RISCV::BI__builtin_riscv_brev8_32:
23236 case RISCV::BI__builtin_riscv_brev8_64:
23237 case RISCV::BI__builtin_riscv_zip_32:
23238 case RISCV::BI__builtin_riscv_unzip_32: {
23239 switch (BuiltinID) {
23240 default: llvm_unreachable("unexpected builtin ID");
23241 // Zbb
23242 case RISCV::BI__builtin_riscv_orc_b_32:
23243 case RISCV::BI__builtin_riscv_orc_b_64:
23244 ID = Intrinsic::riscv_orc_b;
23245 break;
23246
23247 // Zbc
23248 case RISCV::BI__builtin_riscv_clmul_32:
23249 case RISCV::BI__builtin_riscv_clmul_64:
23250 ID = Intrinsic::riscv_clmul;
23251 break;
23252 case RISCV::BI__builtin_riscv_clmulh_32:
23253 case RISCV::BI__builtin_riscv_clmulh_64:
23254 ID = Intrinsic::riscv_clmulh;
23255 break;
23256 case RISCV::BI__builtin_riscv_clmulr_32:
23257 case RISCV::BI__builtin_riscv_clmulr_64:
23258 ID = Intrinsic::riscv_clmulr;
23259 break;
23260
23261 // Zbkx
23262 case RISCV::BI__builtin_riscv_xperm8_32:
23263 case RISCV::BI__builtin_riscv_xperm8_64:
23264 ID = Intrinsic::riscv_xperm8;
23265 break;
23266 case RISCV::BI__builtin_riscv_xperm4_32:
23267 case RISCV::BI__builtin_riscv_xperm4_64:
23268 ID = Intrinsic::riscv_xperm4;
23269 break;
23270
23271 // Zbkb
23272 case RISCV::BI__builtin_riscv_brev8_32:
23273 case RISCV::BI__builtin_riscv_brev8_64:
23274 ID = Intrinsic::riscv_brev8;
23275 break;
23276 case RISCV::BI__builtin_riscv_zip_32:
23277 ID = Intrinsic::riscv_zip;
23278 break;
23279 case RISCV::BI__builtin_riscv_unzip_32:
23280 ID = Intrinsic::riscv_unzip;
23281 break;
23282 }
23283
23284 IntrinsicTypes = {ResultType};
23285 break;
23286 }
23287
23288 // Zk builtins
23289
23290 // Zknh
23291 case RISCV::BI__builtin_riscv_sha256sig0:
23292 ID = Intrinsic::riscv_sha256sig0;
23293 break;
23294 case RISCV::BI__builtin_riscv_sha256sig1:
23295 ID = Intrinsic::riscv_sha256sig1;
23296 break;
23297 case RISCV::BI__builtin_riscv_sha256sum0:
23298 ID = Intrinsic::riscv_sha256sum0;
23299 break;
23300 case RISCV::BI__builtin_riscv_sha256sum1:
23301 ID = Intrinsic::riscv_sha256sum1;
23302 break;
23303
23304 // Zksed
23305 case RISCV::BI__builtin_riscv_sm4ks:
23306 ID = Intrinsic::riscv_sm4ks;
23307 break;
23308 case RISCV::BI__builtin_riscv_sm4ed:
23309 ID = Intrinsic::riscv_sm4ed;
23310 break;
23311
23312 // Zksh
23313 case RISCV::BI__builtin_riscv_sm3p0:
23314 ID = Intrinsic::riscv_sm3p0;
23315 break;
23316 case RISCV::BI__builtin_riscv_sm3p1:
23317 ID = Intrinsic::riscv_sm3p1;
23318 break;
23319
23320 case RISCV::BI__builtin_riscv_clz_32:
23321 case RISCV::BI__builtin_riscv_clz_64: {
23322 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
23323 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23324 if (Result->getType() != ResultType)
23325 Result =
23326 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23327 return Result;
23328 }
23329 case RISCV::BI__builtin_riscv_ctz_32:
23330 case RISCV::BI__builtin_riscv_ctz_64: {
23331 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
23332 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23333 if (Result->getType() != ResultType)
23334 Result =
23335 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23336 return Result;
23337 }
23338
23339 // Zihintntl
23340 case RISCV::BI__builtin_riscv_ntl_load: {
23341 llvm::Type *ResTy = ConvertType(E->getType());
23342 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23343 if (Ops.size() == 2)
23344 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
23345
23346 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23348 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23349 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23350 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23351
23352 int Width;
23353 if(ResTy->isScalableTy()) {
23354 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
23355 llvm::Type *ScalarTy = ResTy->getScalarType();
23356 Width = ScalarTy->getPrimitiveSizeInBits() *
23357 SVTy->getElementCount().getKnownMinValue();
23358 } else
23359 Width = ResTy->getPrimitiveSizeInBits();
23360 LoadInst *Load = Builder.CreateLoad(
23361 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
23362
23363 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23364 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23365 RISCVDomainNode);
23366
23367 return Load;
23368 }
23369 case RISCV::BI__builtin_riscv_ntl_store: {
23370 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23371 if (Ops.size() == 3)
23372 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
23373
23374 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23376 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23377 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23378 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23379
23380 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
23381 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23382 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23383 RISCVDomainNode);
23384
23385 return Store;
23386 }
23387 // XCValu
23388 case RISCV::BI__builtin_riscv_cv_alu_addN:
23389 ID = Intrinsic::riscv_cv_alu_addN;
23390 break;
23391 case RISCV::BI__builtin_riscv_cv_alu_addRN:
23392 ID = Intrinsic::riscv_cv_alu_addRN;
23393 break;
23394 case RISCV::BI__builtin_riscv_cv_alu_adduN:
23395 ID = Intrinsic::riscv_cv_alu_adduN;
23396 break;
23397 case RISCV::BI__builtin_riscv_cv_alu_adduRN:
23398 ID = Intrinsic::riscv_cv_alu_adduRN;
23399 break;
23400 case RISCV::BI__builtin_riscv_cv_alu_clip:
23401 ID = Intrinsic::riscv_cv_alu_clip;
23402 break;
23403 case RISCV::BI__builtin_riscv_cv_alu_clipu:
23404 ID = Intrinsic::riscv_cv_alu_clipu;
23405 break;
23406 case RISCV::BI__builtin_riscv_cv_alu_extbs:
23407 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23408 "extbs");
23409 case RISCV::BI__builtin_riscv_cv_alu_extbz:
23410 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23411 "extbz");
23412 case RISCV::BI__builtin_riscv_cv_alu_exths:
23413 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23414 "exths");
23415 case RISCV::BI__builtin_riscv_cv_alu_exthz:
23416 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23417 "exthz");
23418 case RISCV::BI__builtin_riscv_cv_alu_slet:
23419 return Builder.CreateZExt(Builder.CreateICmpSLE(Ops[0], Ops[1]), Int32Ty,
23420 "sle");
23421 case RISCV::BI__builtin_riscv_cv_alu_sletu:
23422 return Builder.CreateZExt(Builder.CreateICmpULE(Ops[0], Ops[1]), Int32Ty,
23423 "sleu");
23424 case RISCV::BI__builtin_riscv_cv_alu_subN:
23425 ID = Intrinsic::riscv_cv_alu_subN;
23426 break;
23427 case RISCV::BI__builtin_riscv_cv_alu_subRN:
23428 ID = Intrinsic::riscv_cv_alu_subRN;
23429 break;
23430 case RISCV::BI__builtin_riscv_cv_alu_subuN:
23431 ID = Intrinsic::riscv_cv_alu_subuN;
23432 break;
23433 case RISCV::BI__builtin_riscv_cv_alu_subuRN:
23434 ID = Intrinsic::riscv_cv_alu_subuRN;
23435 break;
23436
23437 // Vector builtins are handled from here.
23438#include "clang/Basic/riscv_vector_builtin_cg.inc"
23439
23440 // SiFive Vector builtins are handled from here.
23441#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
23442 }
23443
23444 assert(ID != Intrinsic::not_intrinsic);
23445
23446 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
23447 return Builder.CreateCall(F, Ops, "");
23448}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3460
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
static constexpr SparcCPUInfo CPUInfo[]
Definition: Sparc.cpp:67
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8876
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9712
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:377
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1414
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6902
static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch, CGHLSLRuntime &RT, QualType QT)
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:569
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2307
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2273
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6715
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2764
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9682
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:1029
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9675
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:7917
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9902
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7929
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7899
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:8944
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2642
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:622
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:967
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:672
static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6898
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7930
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1611
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:7934
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
static Value * loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder, CodeGenModule &CGM)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:1089
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:791
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2670
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:646
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:997
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9671
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7931
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9749
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6597
constexpr unsigned SVEBitsPerBlock
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2007
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7741
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6895
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:143
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:689
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:429
static bool HasNoIndirectArgumentsOrResults(CGFunctionInfo const &FnInfo)
Checks no arguments or results are passed indirectly in the ABI (i.e.
Definition: CGBuiltin.cpp:903
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition: CGBuiltin.cpp:250
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9776
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1848
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1697
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1475
static Value * emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:766
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6907
@ UnsignedAlts
Definition: CGBuiltin.cpp:6865
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6870
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6874
@ Use64BitVectors
Definition: CGBuiltin.cpp:6867
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6862
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6872
@ InventFloatType
Definition: CGBuiltin.cpp:6864
@ AddRetType
Definition: CGBuiltin.cpp:6857
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6859
@ VectorizeRetType
Definition: CGBuiltin.cpp:6861
@ VectorRet
Definition: CGBuiltin.cpp:6871
@ Add1ArgType
Definition: CGBuiltin.cpp:6858
@ Use128BitVectors
Definition: CGBuiltin.cpp:6868
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:869
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:860
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2500
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:1037
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1550
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9738
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:726
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:985
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2554
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2693
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6673
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:420
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9764
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:348
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8855
static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:827
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:79
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8847
@ VolatileRead
Definition: CGBuiltin.cpp:8849
@ NormalRead
Definition: CGBuiltin.cpp:8848
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:514
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:359
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2542
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:474
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:706
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:337
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9704
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7926
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:7226
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:401
static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT)
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:952
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:7992
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:779
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:809
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:658
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2775
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1424
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2508
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:745
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:914
static Value * handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:213
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:412
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1460
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8774
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2267
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:633
static Value * handleHlslClip(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:102
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7928
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:7501
CodeGenFunction::ComplexPairTy ComplexPairTy
const Decl * D
Expr * E
const Environment & Env
Definition: HTMLLogger.cpp:147
unsigned Iter
Definition: HTMLLogger.cpp:153
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:144
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition: SemaHLSL.cpp:48
SourceRange Range
Definition: SemaObjC.cpp:758
SourceLocation Loc
Definition: SemaObjC.cpp:759
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
C Language Family Type Representation.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:489
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1187
IdentifierTable & Idents
Definition: ASTContext.h:680
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:682
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType IntTy
Definition: ASTContext.h:1169
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2203
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2770
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2489
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1160
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:799
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2391
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2394
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3578
QualType getElementType() const
Definition: Type.h:3590
static std::unique_ptr< AtomicScopeModel > create(AtomicScopeModelKind K)
Create an atomic scope model by AtomicScopeModelKind.
Definition: SyncScope.h:273
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:150
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:248
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:252
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:123
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2874
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
ABIArgInfo - Helper class to encapsulate information about how a specific C type should be passed to ...
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
An aggregate value slot.
Definition: CGValue.h:504
Address getAddress() const
Definition: CGValue.h:644
A scoped helper to set the current debug location to the specified location or preferred location of ...
Definition: CGDebugInfo.h:858
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:898
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:915
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:143
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:389
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:381
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:180
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:405
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:151
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:398
Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:219
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:169
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:315
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:437
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:365
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:128
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:189
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:261
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:346
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:259
All available information about a concrete callee.
Definition: CGCall.h:63
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:137
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
llvm::DILocation * CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg)
Create a debug location from TrapLocation that adds an artificial inline frame where the frame name i...
CGFunctionInfo - Class to encapsulate the information about a function definition.
MutableArrayRef< ArgInfo > arguments()
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:274
void add(RValue rvalue, QualType type)
Definition: CGCall.h:305
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID, bool NoMerge=false)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, QualType Ty)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitRISCVCpuSupports(const CallExpr *E)
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
llvm::Value * EmitLoadOfCountedByField(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Address makeNaturalAddressForPointer(llvm::Value *Ptr, QualType T, CharUnits Alignment=CharUnits::Zero(), bool ForPointeeType=false, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
Construct an address with the natural alignment of T.
llvm::Value * EmitCheckedArgForAssume(const Expr *E)
Emits an argument for a call to a __builtin_assume.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitFP8NeonCall(unsigned IID, ArrayRef< llvm::Type * > Tys, SmallVectorImpl< llvm::Value * > &O, const CallExpr *E, const char *name)
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerKind::SanitizerOrdinal > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
RValue EmitAnyExpr(const Expr *E, AggValueSlot aggSlot=AggValueSlot::ignored(), bool ignoreResult=false)
EmitAnyExpr - Emit code to compute the specified expression which can have any type.
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
const FieldDecl * FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset)
llvm::Value * EmitRISCVCpuIs(const CallExpr *E)
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **CallOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * GetCountedByFieldExprGEP(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitFP8NeonFDOTCall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
llvm::Type * ConvertType(QualType T)
llvm::Value * EmitFP8NeonFMLACall(unsigned IID, bool ExtendLaneArg, llvm::Type *RetTy, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
void EmitWritebacks(const CallArgList &Args)
EmitWriteback - Emit callbacks for function.
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV, QualType Type, SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitFP8NeonCvtCall(unsigned IID, llvm::Type *Ty0, llvm::Type *Ty1, bool Extract, SmallVectorImpl< llvm::Value * > &Ops, const CallExpr *E, const char *name)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitSPIRVBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getRawFunctionPointer(GlobalDecl GD, llvm::Type *Ty=nullptr)
Return a function pointer for a reference to the given function.
Definition: CGExpr.cpp:2920
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:263
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
const LangOptions & getLangOpts() const
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1630
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:679
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:667
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:382
LValue - This represents an lvalue references.
Definition: CGValue.h:182
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:419
llvm::Value * getPointer(CodeGenFunction &CGF) const
Address getAddress() const
Definition: CGValue.h:361
void setNontemporal(bool Value)
Definition: CGValue.h:319
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
llvm::Value * getAggregatePointer(QualType PointeeType, CodeGenFunction &CGF) const
Definition: CGValue.h:88
bool isScalar() const
Definition: CGValue.h:64
static RValue getIgnored()
Definition: CGValue.h:93
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:125
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:386
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:170
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:160
const T & getABIInfo() const
Definition: TargetInfo.h:57
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:142
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:179
Complex values, per C99 6.2.5p11.
Definition: Type.h:3146
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:4233
Represents a sugar type with __counted_by or __sized_by annotations, including their _or_null variant...
Definition: Type.h:3307
DynamicCountPointerKind getKind() const
Definition: Type.h:3337
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:2036
T * getAttr() const
Definition: DeclBase.h:576
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:596
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:254
DeclContext * getDeclContext()
Definition: DeclBase.h:451
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:440
bool hasAttr() const
Definition: DeclBase.h:580
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:231
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1493
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3101
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3096
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3092
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:830
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3593
std::optional< std::string > tryEvaluateString(ASTContext &Ctx) const
If the current Expr can be evaluated to a pointer to a null-terminated constant string,...
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3076
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3969
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:276
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:225
Represents difference between two FPOptions values.
Definition: LangOptions.h:979
Represents a member of a struct/union/class.
Definition: Decl.h:3033
const FieldDecl * findCountedByField() const
Find the FieldDecl specified in a FAM's "counted_by" attribute.
Definition: Decl.cpp:4726
Represents a function declaration or definition.
Definition: Decl.h:1935
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2672
unsigned getBuiltinID(bool ConsiderWrapperFunctions=false) const
Returns a value indicating whether this function corresponds to a builtin function.
Definition: Decl.cpp:3653
Represents a prototype with parameter type info, e.g.
Definition: Type.h:5108
@ SME_PStateSMEnabledMask
Definition: Type.h:4588
@ SME_PStateSMCompatibleMask
Definition: Type.h:4589
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5425
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3236
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:280
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:296
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:7786
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3199
QualType getPointeeType() const
Definition: Type.h:3209
A (possibly-)qualified type.
Definition: Type.h:929
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:8021
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2891
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:8063
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2887
The collection of all-type qualifiers we support.
Definition: Type.h:324
Represents a struct/union/class.
Definition: Decl.h:4162
field_range fields() const
Definition: Decl.h:4376
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool setsFPMR() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:346
bool isUnion() const
Definition: Decl.h:3784
bool areArgsDestroyedLeftToRightInCallee() const
Are arguments to a call destroyed left to right in the callee? This is a fundamental language change,...
Definition: TargetCXXABI.h:188
Exposes information about the current target.
Definition: TargetInfo.h:220
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:311
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:697
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1262
bool isLittleEndian() const
Definition: TargetInfo.h:1673
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:861
bool isBigEndian() const
Definition: TargetInfo.h:1672
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
Definition: TargetInfo.h:1333
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1679
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:733
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1828
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1916
bool isBlockPointerType() const
Definition: Type.h:8206
bool isVoidType() const
Definition: Type.h:8516
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2180
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:710
bool hasIntegerRepresentation() const
Determine whether this type has an integer representation of some sort, e.g., it is an integer type o...
Definition: Type.cpp:2055
bool isArrayType() const
Definition: Type.h:8264
bool isCountAttributedType() const
Definition: Type.cpp:727
bool isPointerType() const
Definition: Type.h:8192
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8560
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8810
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:738
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8635
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2270
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2220
bool isObjCObjectPointerType() const
Definition: Type.h:8334
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2292
bool isVectorType() const
Definition: Type.h:8304
bool isFloatingType() const
Definition: Type.cpp:2283
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2230
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8741
bool isRecordType() const
Definition: Type.h:8292
bool isSizelessVectorType() const
Returns true for all scalable vector types.
Definition: Type.cpp:2513
QualType getSizelessVectorEltType(const ASTContext &Ctx) const
Returns the representative type for the element of a sizeless vector builtin type.
Definition: Type.cpp:2579
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1920
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:671
QualType getType() const
Definition: Decl.h:682
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:4035
unsigned getNumElements() const
Definition: Type.h:4050
QualType getElementType() const
Definition: Type.h:4049
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:180
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: FixedPoint.h:19
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1212
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2350
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:447
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:726
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1693
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2126
bool Ret(InterpState &S, CodePtr &PC)
Definition: Interp.h:318
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr, CxxCtorInitializer, and TypeLoc) selects th...
The JSON file list parser is used to communicate input to InstallAPI.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:154
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
const FunctionProtoType * T
SyncScope
Defines synch scope values used internally by clang.
Definition: SyncScope.h:42
llvm::StringRef getAsString(SyncScope S)
Definition: SyncScope.h:60
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
int int32_t
unsigned long uint64_t
long int64_t
unsigned int uint32_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:25
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:190
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:182
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:169
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742