clang 20.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CGValue.h"
21#include "CodeGenFunction.h"
22#include "CodeGenModule.h"
23#include "ConstantEmitter.h"
24#include "PatternInit.h"
25#include "TargetInfo.h"
27#include "clang/AST/Attr.h"
28#include "clang/AST/Decl.h"
29#include "clang/AST/Expr.h"
30#include "clang/AST/OSLog.h"
32#include "clang/AST/Type.h"
38#include "llvm/ADT/APFloat.h"
39#include "llvm/ADT/APInt.h"
40#include "llvm/ADT/FloatingPointMode.h"
41#include "llvm/ADT/SmallPtrSet.h"
42#include "llvm/ADT/StringExtras.h"
43#include "llvm/Analysis/ValueTracking.h"
44#include "llvm/IR/DataLayout.h"
45#include "llvm/IR/InlineAsm.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsBPF.h"
51#include "llvm/IR/IntrinsicsDirectX.h"
52#include "llvm/IR/IntrinsicsHexagon.h"
53#include "llvm/IR/IntrinsicsNVPTX.h"
54#include "llvm/IR/IntrinsicsPowerPC.h"
55#include "llvm/IR/IntrinsicsR600.h"
56#include "llvm/IR/IntrinsicsRISCV.h"
57#include "llvm/IR/IntrinsicsS390.h"
58#include "llvm/IR/IntrinsicsWebAssembly.h"
59#include "llvm/IR/IntrinsicsX86.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/MatrixBuilder.h"
62#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
63#include "llvm/Support/AMDGPUAddrSpace.h"
64#include "llvm/Support/ConvertUTF.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/ScopedPrinter.h"
67#include "llvm/TargetParser/AArch64TargetParser.h"
68#include "llvm/TargetParser/RISCVISAInfo.h"
69#include "llvm/TargetParser/RISCVTargetParser.h"
70#include "llvm/TargetParser/X86TargetParser.h"
71#include <optional>
72#include <utility>
73
74using namespace clang;
75using namespace CodeGen;
76using namespace llvm;
77
78static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
79 Align AlignmentInBytes) {
80 ConstantInt *Byte;
81 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
82 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
83 // Nothing to initialize.
84 return;
85 case LangOptions::TrivialAutoVarInitKind::Zero:
86 Byte = CGF.Builder.getInt8(0x00);
87 break;
88 case LangOptions::TrivialAutoVarInitKind::Pattern: {
89 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
90 Byte = llvm::dyn_cast<llvm::ConstantInt>(
91 initializationPatternFor(CGF.CGM, Int8));
92 break;
93 }
94 }
95 if (CGF.CGM.stopAutoInit())
96 return;
97 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
98 I->addAnnotationMetadata("auto-init");
99}
100
102 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
103
104 Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
105 Value *CMP;
106 Value *LastInstr;
107
108 if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
109 FZeroConst = ConstantVector::getSplat(
110 ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
111 auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
112 CMP = CGF->Builder.CreateIntrinsic(
113 CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
114 {FCompInst}, nullptr);
115 } else
116 CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
117
118 if (CGF->CGM.getTarget().getTriple().isDXIL())
119 LastInstr = CGF->Builder.CreateIntrinsic(
120 CGF->VoidTy, llvm::Intrinsic::dx_discard, {CMP}, nullptr);
121 else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
122 BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
123 BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
124
125 CGF->Builder.CreateCondBr(CMP, LT0, End);
126
127 CGF->Builder.SetInsertPoint(LT0);
128
129 CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::spv_discard, {},
130 nullptr);
131
132 LastInstr = CGF->Builder.CreateBr(End);
133
134 CGF->Builder.SetInsertPoint(End);
135 } else {
136 llvm_unreachable("Backend Codegen not supported.");
137 }
138
139 return LastInstr;
140}
141
143 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
144 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
145 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
146
147 CallArgList Args;
148 LValue Op1TmpLValue =
149 CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
150 LValue Op2TmpLValue =
151 CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
152
154 Args.reverseWritebacks();
155
156 Value *LowBits = nullptr;
157 Value *HighBits = nullptr;
158
159 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
160
161 llvm::Type *RetElementTy = CGF->Int32Ty;
162 if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
163 RetElementTy = llvm::VectorType::get(
164 CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
165 auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
166
167 CallInst *CI = CGF->Builder.CreateIntrinsic(
168 RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
169
170 LowBits = CGF->Builder.CreateExtractValue(CI, 0);
171 HighBits = CGF->Builder.CreateExtractValue(CI, 1);
172
173 } else {
174 // For Non DXIL targets we generate the instructions.
175
176 if (!Op0->getType()->isVectorTy()) {
177 FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
178 Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
179
180 LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
181 HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
182 } else {
183 int NumElements = 1;
184 if (const auto *VecTy =
185 E->getArg(0)->getType()->getAs<clang::VectorType>())
186 NumElements = VecTy->getNumElements();
187
188 FixedVectorType *Uint32VecTy =
189 FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
190 Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
191 if (NumElements == 1) {
192 LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
193 HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
194 } else {
195 SmallVector<int> EvenMask, OddMask;
196 for (int I = 0, E = NumElements; I != E; ++I) {
197 EvenMask.push_back(I * 2);
198 OddMask.push_back(I * 2 + 1);
199 }
200 LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
201 HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
202 }
203 }
204 }
205 CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
206 auto *LastInst =
207 CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
208 CGF->EmitWritebacks(Args);
209 return LastInst;
210}
211
213 assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
214 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
215 "asdouble operands types mismatch");
216 Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0));
217 Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1));
218
219 llvm::Type *ResultType = CGF.DoubleTy;
220 int N = 1;
221 if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
222 N = VTy->getNumElements();
223 ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
224 }
225
226 if (CGF.CGM.getTarget().getTriple().isDXIL())
227 return CGF.Builder.CreateIntrinsic(
228 /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
229 ArrayRef<Value *>{OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
230
231 if (!E->getArg(0)->getType()->isVectorType()) {
232 OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
233 OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
234 }
235
237 for (int i = 0; i < N; i++) {
238 Mask.push_back(i);
239 Mask.push_back(i + N);
240 }
241
242 Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
243
244 return CGF.Builder.CreateBitCast(BitVec, ResultType);
245}
246
247/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
248/// return it as an i8 pointer.
250 LLVMContext &Context = CGF.CGM.getLLVMContext();
251 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
252 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
253 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
254 llvm::Function *F =
255 CGF.CGM.getIntrinsic(llvm::Intrinsic::read_register, {CGF.Int64Ty});
256 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
257 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
258}
259
260/// getBuiltinLibFunction - Given a builtin id for a function like
261/// "__builtin_fabsf", return a Function* for "fabsf".
263 unsigned BuiltinID) {
264 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
265
266 // Get the name, skip over the __builtin_ prefix (if necessary).
267 StringRef Name;
268 GlobalDecl D(FD);
269
270 // TODO: This list should be expanded or refactored after all GCC-compatible
271 // std libcall builtins are implemented.
272 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
273 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
274 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
275 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
276 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
277 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
278 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
279 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
280 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
281 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
282 {Builtin::BI__builtin_printf, "__printfieee128"},
283 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
284 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
285 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
286 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
287 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
288 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
289 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
290 {Builtin::BI__builtin_scanf, "__scanfieee128"},
291 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
292 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
293 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
294 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
295 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
296 };
297
298 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
299 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
300 // if it is 64-bit 'long double' mode.
301 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
302 {Builtin::BI__builtin_frexpl, "frexp"},
303 {Builtin::BI__builtin_ldexpl, "ldexp"},
304 {Builtin::BI__builtin_modfl, "modf"},
305 };
306
307 // If the builtin has been declared explicitly with an assembler label,
308 // use the mangled name. This differs from the plain label on platforms
309 // that prefix labels.
310 if (FD->hasAttr<AsmLabelAttr>())
311 Name = getMangledName(D);
312 else {
313 // TODO: This mutation should also be applied to other targets other than
314 // PPC, after backend supports IEEE 128-bit style libcalls.
315 if (getTriple().isPPC64() &&
316 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
317 F128Builtins.contains(BuiltinID))
318 Name = F128Builtins[BuiltinID];
319 else if (getTriple().isOSAIX() &&
320 &getTarget().getLongDoubleFormat() ==
321 &llvm::APFloat::IEEEdouble() &&
322 AIXLongDouble64Builtins.contains(BuiltinID))
323 Name = AIXLongDouble64Builtins[BuiltinID];
324 else
325 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
326 }
327
328 llvm::FunctionType *Ty =
329 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
330
331 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
332}
333
334/// Emit the conversions required to turn the given value into an
335/// integer of the given size.
336static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
337 QualType T, llvm::IntegerType *IntType) {
338 V = CGF.EmitToMemory(V, T);
339
340 if (V->getType()->isPointerTy())
341 return CGF.Builder.CreatePtrToInt(V, IntType);
342
343 assert(V->getType() == IntType);
344 return V;
345}
346
347static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
348 QualType T, llvm::Type *ResultType) {
349 V = CGF.EmitFromMemory(V, T);
350
351 if (ResultType->isPointerTy())
352 return CGF.Builder.CreateIntToPtr(V, ResultType);
353
354 assert(V->getType() == ResultType);
355 return V;
356}
357
359 ASTContext &Ctx = CGF.getContext();
360 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
361 unsigned Bytes = Ptr.getElementType()->isPointerTy()
363 : Ptr.getElementType()->getScalarSizeInBits() / 8;
364 unsigned Align = Ptr.getAlignment().getQuantity();
365 if (Align % Bytes != 0) {
366 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
367 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
368 // Force address to be at least naturally-aligned.
369 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
370 }
371 return Ptr;
372}
373
374/// Utility to insert an atomic instruction based on Intrinsic::ID
375/// and the expression node.
377 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
378 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
379
380 QualType T = E->getType();
381 assert(E->getArg(0)->getType()->isPointerType());
383 E->getArg(0)->getType()->getPointeeType()));
384 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
385
386 Address DestAddr = CheckAtomicAlignment(CGF, E);
387
388 llvm::IntegerType *IntType = llvm::IntegerType::get(
389 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
390
391 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
392 llvm::Type *ValueType = Val->getType();
393 Val = EmitToInt(CGF, Val, T, IntType);
394
395 llvm::Value *Result =
396 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
397 return EmitFromInt(CGF, Result, T, ValueType);
398}
399
401 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
402 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
403
404 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
405 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
406 LV.setNontemporal(true);
407 CGF.EmitStoreOfScalar(Val, LV, false);
408 return nullptr;
409}
410
412 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
413
414 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
415 LV.setNontemporal(true);
416 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
417}
418
420 llvm::AtomicRMWInst::BinOp Kind,
421 const CallExpr *E) {
422 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
423}
424
425/// Utility to insert an atomic instruction based Intrinsic::ID and
426/// the expression node, where the return value is the result of the
427/// operation.
429 llvm::AtomicRMWInst::BinOp Kind,
430 const CallExpr *E,
431 Instruction::BinaryOps Op,
432 bool Invert = false) {
433 QualType T = E->getType();
434 assert(E->getArg(0)->getType()->isPointerType());
436 E->getArg(0)->getType()->getPointeeType()));
437 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
438
439 Address DestAddr = CheckAtomicAlignment(CGF, E);
440
441 llvm::IntegerType *IntType = llvm::IntegerType::get(
442 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
443
444 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
445 llvm::Type *ValueType = Val->getType();
446 Val = EmitToInt(CGF, Val, T, IntType);
447
448 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
449 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
450 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
451 if (Invert)
452 Result =
453 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
454 llvm::ConstantInt::getAllOnesValue(IntType));
455 Result = EmitFromInt(CGF, Result, T, ValueType);
456 return RValue::get(Result);
457}
458
459/// Utility to insert an atomic cmpxchg instruction.
460///
461/// @param CGF The current codegen function.
462/// @param E Builtin call expression to convert to cmpxchg.
463/// arg0 - address to operate on
464/// arg1 - value to compare with
465/// arg2 - new value
466/// @param ReturnBool Specifies whether to return success flag of
467/// cmpxchg result or the old value.
468///
469/// @returns result of cmpxchg, according to ReturnBool
470///
471/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
472/// invoke the function EmitAtomicCmpXchgForMSIntrin.
474 bool ReturnBool) {
475 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
476 Address DestAddr = CheckAtomicAlignment(CGF, E);
477
478 llvm::IntegerType *IntType = llvm::IntegerType::get(
479 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
480
481 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
482 llvm::Type *ValueType = Cmp->getType();
483 Cmp = EmitToInt(CGF, Cmp, T, IntType);
484 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
485
487 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
488 llvm::AtomicOrdering::SequentiallyConsistent);
489 if (ReturnBool)
490 // Extract boolean success flag and zext it to int.
491 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
492 CGF.ConvertType(E->getType()));
493 else
494 // Extract old value and emit it using the same type as compare value.
495 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
496 ValueType);
497}
498
499/// This function should be invoked to emit atomic cmpxchg for Microsoft's
500/// _InterlockedCompareExchange* intrinsics which have the following signature:
501/// T _InterlockedCompareExchange(T volatile *Destination,
502/// T Exchange,
503/// T Comparand);
504///
505/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
506/// cmpxchg *Destination, Comparand, Exchange.
507/// So we need to swap Comparand and Exchange when invoking
508/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
509/// function MakeAtomicCmpXchgValue since it expects the arguments to be
510/// already swapped.
511
512static
514 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
515 assert(E->getArg(0)->getType()->isPointerType());
517 E->getType(), E->getArg(0)->getType()->getPointeeType()));
519 E->getArg(1)->getType()));
521 E->getArg(2)->getType()));
522
523 Address DestAddr = CheckAtomicAlignment(CGF, E);
524
525 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
526 auto *RTy = Exchange->getType();
527
528 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
529
530 if (RTy->isPointerTy()) {
531 Exchange = CGF.Builder.CreatePtrToInt(Exchange, CGF.IntPtrTy);
532 Comparand = CGF.Builder.CreatePtrToInt(Comparand, CGF.IntPtrTy);
533 }
534
535 // For Release ordering, the failure ordering should be Monotonic.
536 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
537 AtomicOrdering::Monotonic :
538 SuccessOrdering;
539
540 // The atomic instruction is marked volatile for consistency with MSVC. This
541 // blocks the few atomics optimizations that LLVM has. If we want to optimize
542 // _Interlocked* operations in the future, we will have to remove the volatile
543 // marker.
544 auto *CmpXchg = CGF.Builder.CreateAtomicCmpXchg(
545 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
546 CmpXchg->setVolatile(true);
547
548 auto *Result = CGF.Builder.CreateExtractValue(CmpXchg, 0);
549 if (RTy->isPointerTy()) {
550 Result = CGF.Builder.CreateIntToPtr(Result, RTy);
551 }
552
553 return Result;
554}
555
556// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
557// prototyped like this:
558//
559// unsigned char _InterlockedCompareExchange128...(
560// __int64 volatile * _Destination,
561// __int64 _ExchangeHigh,
562// __int64 _ExchangeLow,
563// __int64 * _ComparandResult);
564//
565// Note that Destination is assumed to be at least 16-byte aligned, despite
566// being typed int64.
567
569 const CallExpr *E,
570 AtomicOrdering SuccessOrdering) {
571 assert(E->getNumArgs() == 4);
572 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
573 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
574 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
575 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
576
577 assert(DestPtr->getType()->isPointerTy());
578 assert(!ExchangeHigh->getType()->isPointerTy());
579 assert(!ExchangeLow->getType()->isPointerTy());
580
581 // For Release ordering, the failure ordering should be Monotonic.
582 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
583 ? AtomicOrdering::Monotonic
584 : SuccessOrdering;
585
586 // Convert to i128 pointers and values. Alignment is also overridden for
587 // destination pointer.
588 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
589 Address DestAddr(DestPtr, Int128Ty,
591 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
592
593 // (((i128)hi) << 64) | ((i128)lo)
594 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
595 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
596 ExchangeHigh =
597 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
598 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
599
600 // Load the comparand for the instruction.
601 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
602
603 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
604 SuccessOrdering, FailureOrdering);
605
606 // The atomic instruction is marked volatile for consistency with MSVC. This
607 // blocks the few atomics optimizations that LLVM has. If we want to optimize
608 // _Interlocked* operations in the future, we will have to remove the volatile
609 // marker.
610 CXI->setVolatile(true);
611
612 // Store the result as an outparameter.
613 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
614 ComparandAddr);
615
616 // Get the success boolean and zero extend it to i8.
617 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
618 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
619}
620
622 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
623 assert(E->getArg(0)->getType()->isPointerType());
624
625 auto *IntTy = CGF.ConvertType(E->getType());
626 Address DestAddr = CheckAtomicAlignment(CGF, E);
627 auto *Result = CGF.Builder.CreateAtomicRMW(
628 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
629 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
630}
631
633 CodeGenFunction &CGF, const CallExpr *E,
634 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
635 assert(E->getArg(0)->getType()->isPointerType());
636
637 auto *IntTy = CGF.ConvertType(E->getType());
638 Address DestAddr = CheckAtomicAlignment(CGF, E);
639 auto *Result = CGF.Builder.CreateAtomicRMW(
640 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
641 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
642}
643
644// Build a plain volatile load.
646 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
647 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
648 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
649 llvm::Type *ITy =
650 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
651 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
652 Load->setVolatile(true);
653 return Load;
654}
655
656// Build a plain volatile store.
658 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
659 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
660 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
661 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
662 llvm::StoreInst *Store =
663 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
664 Store->setVolatile(true);
665 return Store;
666}
667
668// Emit a simple mangled intrinsic that has 1 argument and a return type
669// matching the argument type. Depending on mode, this may be a constrained
670// floating-point intrinsic.
672 const CallExpr *E, unsigned IntrinsicID,
673 unsigned ConstrainedIntrinsicID) {
674 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
675
676 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
677 if (CGF.Builder.getIsFPConstrained()) {
678 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
679 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
680 } else {
681 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
682 return CGF.Builder.CreateCall(F, Src0);
683 }
684}
685
686// Emit an intrinsic that has 2 operands of the same type as its result.
687// Depending on mode, this may be a constrained floating-point intrinsic.
689 const CallExpr *E, unsigned IntrinsicID,
690 unsigned ConstrainedIntrinsicID) {
691 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
692 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
693
694 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
695 if (CGF.Builder.getIsFPConstrained()) {
696 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
697 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
698 } else {
699 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
700 return CGF.Builder.CreateCall(F, { Src0, Src1 });
701 }
702}
703
704// Has second type mangled argument.
706 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
707 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
708 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
709 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
710
711 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
712 if (CGF.Builder.getIsFPConstrained()) {
713 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
714 {Src0->getType(), Src1->getType()});
715 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
716 }
717
718 Function *F =
719 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
720 return CGF.Builder.CreateCall(F, {Src0, Src1});
721}
722
723// Emit an intrinsic that has 3 operands of the same type as its result.
724// Depending on mode, this may be a constrained floating-point intrinsic.
726 const CallExpr *E, unsigned IntrinsicID,
727 unsigned ConstrainedIntrinsicID) {
728 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
729 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
730 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
731
732 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
733 if (CGF.Builder.getIsFPConstrained()) {
734 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
735 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
736 } else {
737 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
738 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
739 }
740}
741
742// Emit an intrinsic where all operands are of the same type as the result.
743// Depending on mode, this may be a constrained floating-point intrinsic.
745 unsigned IntrinsicID,
746 unsigned ConstrainedIntrinsicID,
747 llvm::Type *Ty,
748 ArrayRef<Value *> Args) {
749 Function *F;
750 if (CGF.Builder.getIsFPConstrained())
751 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
752 else
753 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
754
755 if (CGF.Builder.getIsFPConstrained())
756 return CGF.Builder.CreateConstrainedFPCall(F, Args);
757 else
758 return CGF.Builder.CreateCall(F, Args);
759}
760
761// Emit a simple intrinsic that has N scalar arguments and a return type
762// matching the argument type. It is assumed that only the first argument is
763// overloaded.
764template <unsigned N>
766 const CallExpr *E,
767 unsigned IntrinsicID,
768 llvm::StringRef Name = "") {
769 static_assert(N, "expect non-empty argument");
771 for (unsigned I = 0; I < N; ++I)
772 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
773 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
774 return CGF.Builder.CreateCall(F, Args, Name);
775}
776
777// Emit an intrinsic that has 1 float or double operand, and 1 integer.
779 const CallExpr *E,
780 unsigned IntrinsicID) {
781 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
782 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
783
784 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
785 return CGF.Builder.CreateCall(F, {Src0, Src1});
786}
787
788// Emit an intrinsic that has overloaded integer result and fp operand.
789static Value *
791 unsigned IntrinsicID,
792 unsigned ConstrainedIntrinsicID) {
793 llvm::Type *ResultType = CGF.ConvertType(E->getType());
794 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
795
796 if (CGF.Builder.getIsFPConstrained()) {
797 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
798 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
799 {ResultType, Src0->getType()});
800 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
801 } else {
802 Function *F =
803 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
804 return CGF.Builder.CreateCall(F, Src0);
805 }
806}
807
809 llvm::Intrinsic::ID IntrinsicID) {
810 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
811 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
812
813 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
814 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
815 llvm::Function *F =
816 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
817 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
818
819 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
820 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
821 CGF.EmitStoreOfScalar(Exp, LV);
822
823 return CGF.Builder.CreateExtractValue(Call, 0);
824}
825
827 llvm::Intrinsic::ID IntrinsicID) {
828 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0));
829 llvm::Value *Dest0 = CGF.EmitScalarExpr(E->getArg(1));
830 llvm::Value *Dest1 = CGF.EmitScalarExpr(E->getArg(2));
831
832 llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Val->getType()});
833 llvm::Value *Call = CGF.Builder.CreateCall(F, Val);
834
835 llvm::Value *SinResult = CGF.Builder.CreateExtractValue(Call, 0);
836 llvm::Value *CosResult = CGF.Builder.CreateExtractValue(Call, 1);
837
838 QualType DestPtrType = E->getArg(1)->getType()->getPointeeType();
839 LValue SinLV = CGF.MakeNaturalAlignAddrLValue(Dest0, DestPtrType);
840 LValue CosLV = CGF.MakeNaturalAlignAddrLValue(Dest1, DestPtrType);
841
842 llvm::StoreInst *StoreSin =
843 CGF.Builder.CreateStore(SinResult, SinLV.getAddress());
844 llvm::StoreInst *StoreCos =
845 CGF.Builder.CreateStore(CosResult, CosLV.getAddress());
846
847 // Mark the two stores as non-aliasing with each other. The order of stores
848 // emitted by this builtin is arbitrary, enforcing a particular order will
849 // prevent optimizations later on.
850 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
851 MDNode *Domain = MDHelper.createAnonymousAliasScopeDomain();
852 MDNode *AliasScope = MDHelper.createAnonymousAliasScope(Domain);
853 MDNode *AliasScopeList = MDNode::get(Call->getContext(), AliasScope);
854 StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList);
855 StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
856}
857
858/// EmitFAbs - Emit a call to @llvm.fabs().
860 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
861 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
862 Call->setDoesNotAccessMemory();
863 return Call;
864}
865
866/// Emit the computation of the sign bit for a floating point value. Returns
867/// the i1 sign bit value.
869 LLVMContext &C = CGF.CGM.getLLVMContext();
870
871 llvm::Type *Ty = V->getType();
872 int Width = Ty->getPrimitiveSizeInBits();
873 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
874 V = CGF.Builder.CreateBitCast(V, IntTy);
875 if (Ty->isPPC_FP128Ty()) {
876 // We want the sign bit of the higher-order double. The bitcast we just
877 // did works as if the double-double was stored to memory and then
878 // read as an i128. The "store" will put the higher-order double in the
879 // lower address in both little- and big-Endian modes, but the "load"
880 // will treat those bits as a different part of the i128: the low bits in
881 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
882 // we need to shift the high bits down to the low before truncating.
883 Width >>= 1;
884 if (CGF.getTarget().isBigEndian()) {
885 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
886 V = CGF.Builder.CreateLShr(V, ShiftCst);
887 }
888 // We are truncating value in order to extract the higher-order
889 // double, which we will be using to extract the sign from.
890 IntTy = llvm::IntegerType::get(C, Width);
891 V = CGF.Builder.CreateTrunc(V, IntTy);
892 }
893 Value *Zero = llvm::Constant::getNullValue(IntTy);
894 return CGF.Builder.CreateICmpSLT(V, Zero);
895}
896
897/// Checks no arguments or results are passed indirectly in the ABI (i.e. via a
898/// hidden pointer). This is used to check annotating FP libcalls (that could
899/// set `errno`) with "int" TBAA metadata is safe. If any floating-point
900/// arguments are passed indirectly, setup for the call could be incorrectly
901/// optimized out.
903 auto IsIndirect = [&](ABIArgInfo const &info) {
904 return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca();
905 };
906 return !IsIndirect(FnInfo.getReturnInfo()) &&
907 llvm::none_of(FnInfo.arguments(),
908 [&](CGFunctionInfoArgInfo const &ArgInfo) {
909 return IsIndirect(ArgInfo.info);
910 });
911}
912
914 const CallExpr *E, llvm::Constant *calleeValue) {
915 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
916 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
917 llvm::CallBase *callOrInvoke = nullptr;
918 CGFunctionInfo const *FnInfo = nullptr;
919 RValue Call =
920 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(),
921 /*Chain=*/nullptr, &callOrInvoke, &FnInfo);
922
923 if (unsigned BuiltinID = FD->getBuiltinID()) {
924 // Check whether a FP math builtin function, such as BI__builtin_expf
925 ASTContext &Context = CGF.getContext();
926 bool ConstWithoutErrnoAndExceptions =
928 // Restrict to target with errno, for example, MacOS doesn't set errno.
929 // TODO: Support builtin function with complex type returned, eg: cacosh
930 if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno &&
931 !CGF.Builder.getIsFPConstrained() && Call.isScalar() &&
933 // Emit "int" TBAA metadata on FP math libcalls.
934 clang::QualType IntTy = Context.IntTy;
935 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
936 CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo);
937 }
938 }
939 return Call;
940}
941
942/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
943/// depending on IntrinsicID.
944///
945/// \arg CGF The current codegen function.
946/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
947/// \arg X The first argument to the llvm.*.with.overflow.*.
948/// \arg Y The second argument to the llvm.*.with.overflow.*.
949/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
950/// \returns The result (i.e. sum/product) returned by the intrinsic.
951static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
952 const llvm::Intrinsic::ID IntrinsicID,
953 llvm::Value *X, llvm::Value *Y,
954 llvm::Value *&Carry) {
955 // Make sure we have integers of the same width.
956 assert(X->getType() == Y->getType() &&
957 "Arguments must be the same type. (Did you forget to make sure both "
958 "arguments have the same integer width?)");
959
960 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
961 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
962 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
963 return CGF.Builder.CreateExtractValue(Tmp, 0);
964}
965
966static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
967 int low, int high) {
968 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
969 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
970 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
971 Call->addRangeRetAttr(CR);
972 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
973 return Call;
974}
975
976namespace {
977 struct WidthAndSignedness {
978 unsigned Width;
979 bool Signed;
980 };
981}
982
983static WidthAndSignedness
985 const clang::QualType Type) {
986 assert(Type->isIntegerType() && "Given type is not an integer.");
987 unsigned Width = context.getIntWidth(Type);
989 return {Width, Signed};
990}
991
992// Given one or more integer types, this function produces an integer type that
993// encompasses them: any value in one of the given types could be expressed in
994// the encompassing type.
995static struct WidthAndSignedness
996EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
997 assert(Types.size() > 0 && "Empty list of types.");
998
999 // If any of the given types is signed, we must return a signed type.
1000 bool Signed = false;
1001 for (const auto &Type : Types) {
1002 Signed |= Type.Signed;
1003 }
1004
1005 // The encompassing type must have a width greater than or equal to the width
1006 // of the specified types. Additionally, if the encompassing type is signed,
1007 // its width must be strictly greater than the width of any unsigned types
1008 // given.
1009 unsigned Width = 0;
1010 for (const auto &Type : Types) {
1011 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
1012 if (Width < MinWidth) {
1013 Width = MinWidth;
1014 }
1015 }
1016
1017 return {Width, Signed};
1018}
1019
1020Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
1021 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
1022 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
1023 ArgValue);
1024}
1025
1026/// Checks if using the result of __builtin_object_size(p, @p From) in place of
1027/// __builtin_object_size(p, @p To) is correct
1028static bool areBOSTypesCompatible(int From, int To) {
1029 // Note: Our __builtin_object_size implementation currently treats Type=0 and
1030 // Type=2 identically. Encoding this implementation detail here may make
1031 // improving __builtin_object_size difficult in the future, so it's omitted.
1032 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
1033}
1034
1035static llvm::Value *
1036getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
1037 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
1038}
1039
1040llvm::Value *
1041CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
1042 llvm::IntegerType *ResType,
1043 llvm::Value *EmittedE,
1044 bool IsDynamic) {
1045 uint64_t ObjectSize;
1046 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
1047 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
1048 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
1049}
1050
1052 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
1053 uint64_t &Offset) {
1054 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
1055 getLangOpts().getStrictFlexArraysLevel();
1056 uint32_t FieldNo = 0;
1057
1058 if (RD->isImplicit())
1059 return nullptr;
1060
1061 for (const FieldDecl *FD : RD->fields()) {
1062 if ((!FAMDecl || FD == FAMDecl) &&
1064 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
1065 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
1066 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1067 Offset += Layout.getFieldOffset(FieldNo);
1068 return FD;
1069 }
1070
1071 QualType Ty = FD->getType();
1072 if (Ty->isRecordType()) {
1074 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
1075 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1076 Offset += Layout.getFieldOffset(FieldNo);
1077 return Field;
1078 }
1079 }
1080
1081 if (!RD->isUnion())
1082 ++FieldNo;
1083 }
1084
1085 return nullptr;
1086}
1087
1088static unsigned CountCountedByAttrs(const RecordDecl *RD) {
1089 unsigned Num = 0;
1090
1091 for (const FieldDecl *FD : RD->fields()) {
1092 if (FD->getType()->isCountAttributedType())
1093 return ++Num;
1094
1095 QualType Ty = FD->getType();
1096 if (Ty->isRecordType())
1098 }
1099
1100 return Num;
1101}
1102
1103llvm::Value *
1104CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
1105 llvm::IntegerType *ResType) {
1106 // The code generated here calculates the size of a struct with a flexible
1107 // array member that uses the counted_by attribute. There are two instances
1108 // we handle:
1109 //
1110 // struct s {
1111 // unsigned long flags;
1112 // int count;
1113 // int array[] __attribute__((counted_by(count)));
1114 // }
1115 //
1116 // 1) bdos of the flexible array itself:
1117 //
1118 // __builtin_dynamic_object_size(p->array, 1) ==
1119 // p->count * sizeof(*p->array)
1120 //
1121 // 2) bdos of a pointer into the flexible array:
1122 //
1123 // __builtin_dynamic_object_size(&p->array[42], 1) ==
1124 // (p->count - 42) * sizeof(*p->array)
1125 //
1126 // 2) bdos of the whole struct, including the flexible array:
1127 //
1128 // __builtin_dynamic_object_size(p, 1) ==
1129 // max(sizeof(struct s),
1130 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1131 //
1132 ASTContext &Ctx = getContext();
1133 const Expr *Base = E->IgnoreParenImpCasts();
1134 const Expr *Idx = nullptr;
1135
1136 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
1137 UO && UO->getOpcode() == UO_AddrOf) {
1138 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
1139 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
1140 Base = ASE->getBase()->IgnoreParenImpCasts();
1141 Idx = ASE->getIdx()->IgnoreParenImpCasts();
1142
1143 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
1144 int64_t Val = IL->getValue().getSExtValue();
1145 if (Val < 0)
1146 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1147
1148 if (Val == 0)
1149 // The index is 0, so we don't need to take it into account.
1150 Idx = nullptr;
1151 }
1152 } else {
1153 // Potential pointer to another element in the struct.
1154 Base = SubExpr;
1155 }
1156 }
1157
1158 // Get the flexible array member Decl.
1159 const RecordDecl *OuterRD = nullptr;
1160 const FieldDecl *FAMDecl = nullptr;
1161 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
1162 // Check if \p Base is referencing the FAM itself.
1163 const ValueDecl *VD = ME->getMemberDecl();
1165 FAMDecl = dyn_cast<FieldDecl>(VD);
1166 if (!FAMDecl)
1167 return nullptr;
1168 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
1169 // Check if we're pointing to the whole struct.
1170 QualType Ty = DRE->getDecl()->getType();
1171 if (Ty->isPointerType())
1172 Ty = Ty->getPointeeType();
1173 OuterRD = Ty->getAsRecordDecl();
1174
1175 // If we have a situation like this:
1176 //
1177 // struct union_of_fams {
1178 // int flags;
1179 // union {
1180 // signed char normal_field;
1181 // struct {
1182 // int count1;
1183 // int arr1[] __counted_by(count1);
1184 // };
1185 // struct {
1186 // signed char count2;
1187 // int arr2[] __counted_by(count2);
1188 // };
1189 // };
1190 // };
1191 //
1192 // We don't know which 'count' to use in this scenario:
1193 //
1194 // size_t get_size(struct union_of_fams *p) {
1195 // return __builtin_dynamic_object_size(p, 1);
1196 // }
1197 //
1198 // Instead of calculating a wrong number, we give up.
1199 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
1200 return nullptr;
1201 }
1202
1203 if (!OuterRD)
1204 return nullptr;
1205
1206 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
1207 // get its offset.
1208 uint64_t Offset = 0;
1209 FAMDecl =
1210 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
1211 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
1212
1213 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
1214 // No flexible array member found or it doesn't have the "counted_by"
1215 // attribute.
1216 return nullptr;
1217
1218 const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
1219 if (!CountedByFD)
1220 // Can't find the field referenced by the "counted_by" attribute.
1221 return nullptr;
1222
1223 if (isa<DeclRefExpr>(Base))
1224 // The whole struct is specificed in the __bdos. The calculation of the
1225 // whole size of the structure can be done in two ways:
1226 //
1227 // 1) sizeof(struct S) + count * sizeof(typeof(fam))
1228 // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
1229 //
1230 // The first will add additional padding after the end of the array,
1231 // allocation while the second method is more precise, but not quite
1232 // expected from programmers. See
1233 // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
1234 // discussion of the topic.
1235 //
1236 // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
1237 // structure. Therefore, because of the above issue, we'll choose to match
1238 // what GCC does for consistency's sake.
1239 return nullptr;
1240
1241 // Build a load of the counted_by field.
1242 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1243 Value *CountedByInst = EmitLoadOfCountedByField(Base, FAMDecl, CountedByFD);
1244 if (!CountedByInst)
1245 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1246
1247 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1248
1249 // Build a load of the index and subtract it from the count.
1250 Value *IdxInst = nullptr;
1251 if (Idx) {
1252 if (Idx->HasSideEffects(getContext()))
1253 // We can't have side-effects.
1254 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1255
1256 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1257 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1258 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1259
1260 // We go ahead with the calculation here. If the index turns out to be
1261 // negative, we'll catch it at the end.
1262 CountedByInst =
1263 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1264 }
1265
1266 // Calculate how large the flexible array member is in bytes.
1267 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1269 llvm::Constant *ElemSize =
1270 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1271 Value *Res =
1272 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1273 Res = Builder.CreateIntCast(Res, ResType, IsSigned);
1274
1275 // A negative \p IdxInst or \p CountedByInst means that the index lands
1276 // outside of the flexible array member. If that's the case, we want to
1277 // return 0.
1278 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1279 if (IdxInst)
1280 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1281
1282 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1283}
1284
1285/// Returns a Value corresponding to the size of the given expression.
1286/// This Value may be either of the following:
1287/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1288/// it)
1289/// - A call to the @llvm.objectsize intrinsic
1290///
1291/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1292/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1293/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1294llvm::Value *
1295CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1296 llvm::IntegerType *ResType,
1297 llvm::Value *EmittedE, bool IsDynamic) {
1298 // We need to reference an argument if the pointer is a parameter with the
1299 // pass_object_size attribute.
1300 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1301 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1302 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1303 if (Param != nullptr && PS != nullptr &&
1304 areBOSTypesCompatible(PS->getType(), Type)) {
1305 auto Iter = SizeArguments.find(Param);
1306 assert(Iter != SizeArguments.end());
1307
1308 const ImplicitParamDecl *D = Iter->second;
1309 auto DIter = LocalDeclMap.find(D);
1310 assert(DIter != LocalDeclMap.end());
1311
1312 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1313 getContext().getSizeType(), E->getBeginLoc());
1314 }
1315 }
1316
1317 if (IsDynamic) {
1318 // Emit special code for a flexible array member with the "counted_by"
1319 // attribute.
1320 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1321 return V;
1322 }
1323
1324 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1325 // evaluate E for side-effects. In either case, we shouldn't lower to
1326 // @llvm.objectsize.
1327 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1328 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1329
1330 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1331 assert(Ptr->getType()->isPointerTy() &&
1332 "Non-pointer passed to __builtin_object_size?");
1333
1334 Function *F =
1335 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1336
1337 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1338 Value *Min = Builder.getInt1((Type & 2) != 0);
1339 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1340 Value *NullIsUnknown = Builder.getTrue();
1341 Value *Dynamic = Builder.getInt1(IsDynamic);
1342 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1343}
1344
1345namespace {
1346/// A struct to generically describe a bit test intrinsic.
1347struct BitTest {
1348 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1349 enum InterlockingKind : uint8_t {
1350 Unlocked,
1351 Sequential,
1352 Acquire,
1353 Release,
1354 NoFence
1355 };
1356
1357 ActionKind Action;
1358 InterlockingKind Interlocking;
1359 bool Is64Bit;
1360
1361 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1362};
1363
1364} // namespace
1365
1366BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1367 switch (BuiltinID) {
1368 // Main portable variants.
1369 case Builtin::BI_bittest:
1370 return {TestOnly, Unlocked, false};
1371 case Builtin::BI_bittestandcomplement:
1372 return {Complement, Unlocked, false};
1373 case Builtin::BI_bittestandreset:
1374 return {Reset, Unlocked, false};
1375 case Builtin::BI_bittestandset:
1376 return {Set, Unlocked, false};
1377 case Builtin::BI_interlockedbittestandreset:
1378 return {Reset, Sequential, false};
1379 case Builtin::BI_interlockedbittestandset:
1380 return {Set, Sequential, false};
1381
1382 // X86-specific 64-bit variants.
1383 case Builtin::BI_bittest64:
1384 return {TestOnly, Unlocked, true};
1385 case Builtin::BI_bittestandcomplement64:
1386 return {Complement, Unlocked, true};
1387 case Builtin::BI_bittestandreset64:
1388 return {Reset, Unlocked, true};
1389 case Builtin::BI_bittestandset64:
1390 return {Set, Unlocked, true};
1391 case Builtin::BI_interlockedbittestandreset64:
1392 return {Reset, Sequential, true};
1393 case Builtin::BI_interlockedbittestandset64:
1394 return {Set, Sequential, true};
1395
1396 // ARM/AArch64-specific ordering variants.
1397 case Builtin::BI_interlockedbittestandset_acq:
1398 return {Set, Acquire, false};
1399 case Builtin::BI_interlockedbittestandset_rel:
1400 return {Set, Release, false};
1401 case Builtin::BI_interlockedbittestandset_nf:
1402 return {Set, NoFence, false};
1403 case Builtin::BI_interlockedbittestandreset_acq:
1404 return {Reset, Acquire, false};
1405 case Builtin::BI_interlockedbittestandreset_rel:
1406 return {Reset, Release, false};
1407 case Builtin::BI_interlockedbittestandreset_nf:
1408 return {Reset, NoFence, false};
1409 }
1410 llvm_unreachable("expected only bittest intrinsics");
1411}
1412
1413static char bitActionToX86BTCode(BitTest::ActionKind A) {
1414 switch (A) {
1415 case BitTest::TestOnly: return '\0';
1416 case BitTest::Complement: return 'c';
1417 case BitTest::Reset: return 'r';
1418 case BitTest::Set: return 's';
1419 }
1420 llvm_unreachable("invalid action");
1421}
1422
1424 BitTest BT,
1425 const CallExpr *E, Value *BitBase,
1426 Value *BitPos) {
1427 char Action = bitActionToX86BTCode(BT.Action);
1428 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1429
1430 // Build the assembly.
1432 raw_svector_ostream AsmOS(Asm);
1433 if (BT.Interlocking != BitTest::Unlocked)
1434 AsmOS << "lock ";
1435 AsmOS << "bt";
1436 if (Action)
1437 AsmOS << Action;
1438 AsmOS << SizeSuffix << " $2, ($1)";
1439
1440 // Build the constraints. FIXME: We should support immediates when possible.
1441 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1442 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1443 if (!MachineClobbers.empty()) {
1444 Constraints += ',';
1445 Constraints += MachineClobbers;
1446 }
1447 llvm::IntegerType *IntType = llvm::IntegerType::get(
1448 CGF.getLLVMContext(),
1449 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1450 llvm::FunctionType *FTy =
1451 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1452
1453 llvm::InlineAsm *IA =
1454 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1455 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1456}
1457
1458static llvm::AtomicOrdering
1459getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1460 switch (I) {
1461 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1462 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1463 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1464 case BitTest::Release: return llvm::AtomicOrdering::Release;
1465 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1466 }
1467 llvm_unreachable("invalid interlocking");
1468}
1469
1470/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1471/// bits and a bit position and read and optionally modify the bit at that
1472/// position. The position index can be arbitrarily large, i.e. it can be larger
1473/// than 31 or 63, so we need an indexed load in the general case.
1474static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1475 unsigned BuiltinID,
1476 const CallExpr *E) {
1477 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1478 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1479
1480 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1481
1482 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1483 // indexing operation internally. Use them if possible.
1484 if (CGF.getTarget().getTriple().isX86())
1485 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1486
1487 // Otherwise, use generic code to load one byte and test the bit. Use all but
1488 // the bottom three bits as the array index, and the bottom three bits to form
1489 // a mask.
1490 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1491 Value *ByteIndex = CGF.Builder.CreateAShr(
1492 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1493 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBase, ByteIndex,
1494 "bittest.byteaddr"),
1495 CGF.Int8Ty, CharUnits::One());
1496 Value *PosLow =
1497 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1498 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1499
1500 // The updating instructions will need a mask.
1501 Value *Mask = nullptr;
1502 if (BT.Action != BitTest::TestOnly) {
1503 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1504 "bittest.mask");
1505 }
1506
1507 // Check the action and ordering of the interlocked intrinsics.
1508 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1509
1510 Value *OldByte = nullptr;
1511 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1512 // Emit a combined atomicrmw load/store operation for the interlocked
1513 // intrinsics.
1514 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1515 if (BT.Action == BitTest::Reset) {
1516 Mask = CGF.Builder.CreateNot(Mask);
1517 RMWOp = llvm::AtomicRMWInst::And;
1518 }
1519 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1520 } else {
1521 // Emit a plain load for the non-interlocked intrinsics.
1522 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1523 Value *NewByte = nullptr;
1524 switch (BT.Action) {
1525 case BitTest::TestOnly:
1526 // Don't store anything.
1527 break;
1528 case BitTest::Complement:
1529 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1530 break;
1531 case BitTest::Reset:
1532 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1533 break;
1534 case BitTest::Set:
1535 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1536 break;
1537 }
1538 if (NewByte)
1539 CGF.Builder.CreateStore(NewByte, ByteAddr);
1540 }
1541
1542 // However we loaded the old byte, either by plain load or atomicrmw, shift
1543 // the bit into the low position and mask it to 0 or 1.
1544 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1545 return CGF.Builder.CreateAnd(
1546 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1547}
1548
1550 unsigned BuiltinID,
1551 const CallExpr *E) {
1552 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1553
1555 raw_svector_ostream AsmOS(Asm);
1556 llvm::IntegerType *RetType = CGF.Int32Ty;
1557
1558 switch (BuiltinID) {
1559 case clang::PPC::BI__builtin_ppc_ldarx:
1560 AsmOS << "ldarx ";
1561 RetType = CGF.Int64Ty;
1562 break;
1563 case clang::PPC::BI__builtin_ppc_lwarx:
1564 AsmOS << "lwarx ";
1565 RetType = CGF.Int32Ty;
1566 break;
1567 case clang::PPC::BI__builtin_ppc_lharx:
1568 AsmOS << "lharx ";
1569 RetType = CGF.Int16Ty;
1570 break;
1571 case clang::PPC::BI__builtin_ppc_lbarx:
1572 AsmOS << "lbarx ";
1573 RetType = CGF.Int8Ty;
1574 break;
1575 default:
1576 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1577 }
1578
1579 AsmOS << "$0, ${1:y}";
1580
1581 std::string Constraints = "=r,*Z,~{memory}";
1582 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1583 if (!MachineClobbers.empty()) {
1584 Constraints += ',';
1585 Constraints += MachineClobbers;
1586 }
1587
1588 llvm::Type *PtrType = CGF.UnqualPtrTy;
1589 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1590
1591 llvm::InlineAsm *IA =
1592 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1593 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1594 CI->addParamAttr(
1595 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1596 return CI;
1597}
1598
1599namespace {
1600enum class MSVCSetJmpKind {
1601 _setjmpex,
1602 _setjmp3,
1603 _setjmp
1604};
1605}
1606
1607/// MSVC handles setjmp a bit differently on different platforms. On every
1608/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1609/// parameters can be passed as variadic arguments, but we always pass none.
1610static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1611 const CallExpr *E) {
1612 llvm::Value *Arg1 = nullptr;
1613 llvm::Type *Arg1Ty = nullptr;
1614 StringRef Name;
1615 bool IsVarArg = false;
1616 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1617 Name = "_setjmp3";
1618 Arg1Ty = CGF.Int32Ty;
1619 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1620 IsVarArg = true;
1621 } else {
1622 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1623 Arg1Ty = CGF.Int8PtrTy;
1624 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1625 Arg1 = CGF.Builder.CreateCall(
1626 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1627 } else
1628 Arg1 = CGF.Builder.CreateCall(
1629 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1630 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1631 }
1632
1633 // Mark the call site and declaration with ReturnsTwice.
1634 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1635 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1636 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1637 llvm::Attribute::ReturnsTwice);
1638 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1639 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1640 ReturnsTwiceAttr, /*Local=*/true);
1641
1642 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1643 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1644 llvm::Value *Args[] = {Buf, Arg1};
1645 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1646 CB->setAttributes(ReturnsTwiceAttr);
1647 return RValue::get(CB);
1648}
1649
1650// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1651// we handle them here.
1692 __fastfail,
1693};
1694
1695static std::optional<CodeGenFunction::MSVCIntrin>
1696translateArmToMsvcIntrin(unsigned BuiltinID) {
1697 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1698 switch (BuiltinID) {
1699 default:
1700 return std::nullopt;
1701 case clang::ARM::BI_BitScanForward:
1702 case clang::ARM::BI_BitScanForward64:
1703 return MSVCIntrin::_BitScanForward;
1704 case clang::ARM::BI_BitScanReverse:
1705 case clang::ARM::BI_BitScanReverse64:
1706 return MSVCIntrin::_BitScanReverse;
1707 case clang::ARM::BI_InterlockedAnd64:
1708 return MSVCIntrin::_InterlockedAnd;
1709 case clang::ARM::BI_InterlockedExchange64:
1710 return MSVCIntrin::_InterlockedExchange;
1711 case clang::ARM::BI_InterlockedExchangeAdd64:
1712 return MSVCIntrin::_InterlockedExchangeAdd;
1713 case clang::ARM::BI_InterlockedExchangeSub64:
1714 return MSVCIntrin::_InterlockedExchangeSub;
1715 case clang::ARM::BI_InterlockedOr64:
1716 return MSVCIntrin::_InterlockedOr;
1717 case clang::ARM::BI_InterlockedXor64:
1718 return MSVCIntrin::_InterlockedXor;
1719 case clang::ARM::BI_InterlockedDecrement64:
1720 return MSVCIntrin::_InterlockedDecrement;
1721 case clang::ARM::BI_InterlockedIncrement64:
1722 return MSVCIntrin::_InterlockedIncrement;
1723 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1724 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1725 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1726 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1727 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1728 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1729 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1730 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1731 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1732 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1733 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1734 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1735 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1736 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1737 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1738 case clang::ARM::BI_InterlockedExchange8_acq:
1739 case clang::ARM::BI_InterlockedExchange16_acq:
1740 case clang::ARM::BI_InterlockedExchange_acq:
1741 case clang::ARM::BI_InterlockedExchange64_acq:
1742 case clang::ARM::BI_InterlockedExchangePointer_acq:
1743 return MSVCIntrin::_InterlockedExchange_acq;
1744 case clang::ARM::BI_InterlockedExchange8_rel:
1745 case clang::ARM::BI_InterlockedExchange16_rel:
1746 case clang::ARM::BI_InterlockedExchange_rel:
1747 case clang::ARM::BI_InterlockedExchange64_rel:
1748 case clang::ARM::BI_InterlockedExchangePointer_rel:
1749 return MSVCIntrin::_InterlockedExchange_rel;
1750 case clang::ARM::BI_InterlockedExchange8_nf:
1751 case clang::ARM::BI_InterlockedExchange16_nf:
1752 case clang::ARM::BI_InterlockedExchange_nf:
1753 case clang::ARM::BI_InterlockedExchange64_nf:
1754 case clang::ARM::BI_InterlockedExchangePointer_nf:
1755 return MSVCIntrin::_InterlockedExchange_nf;
1756 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1757 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1758 case clang::ARM::BI_InterlockedCompareExchange_acq:
1759 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1760 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
1761 return MSVCIntrin::_InterlockedCompareExchange_acq;
1762 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1763 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1764 case clang::ARM::BI_InterlockedCompareExchange_rel:
1765 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1766 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
1767 return MSVCIntrin::_InterlockedCompareExchange_rel;
1768 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1769 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1770 case clang::ARM::BI_InterlockedCompareExchange_nf:
1771 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1772 return MSVCIntrin::_InterlockedCompareExchange_nf;
1773 case clang::ARM::BI_InterlockedOr8_acq:
1774 case clang::ARM::BI_InterlockedOr16_acq:
1775 case clang::ARM::BI_InterlockedOr_acq:
1776 case clang::ARM::BI_InterlockedOr64_acq:
1777 return MSVCIntrin::_InterlockedOr_acq;
1778 case clang::ARM::BI_InterlockedOr8_rel:
1779 case clang::ARM::BI_InterlockedOr16_rel:
1780 case clang::ARM::BI_InterlockedOr_rel:
1781 case clang::ARM::BI_InterlockedOr64_rel:
1782 return MSVCIntrin::_InterlockedOr_rel;
1783 case clang::ARM::BI_InterlockedOr8_nf:
1784 case clang::ARM::BI_InterlockedOr16_nf:
1785 case clang::ARM::BI_InterlockedOr_nf:
1786 case clang::ARM::BI_InterlockedOr64_nf:
1787 return MSVCIntrin::_InterlockedOr_nf;
1788 case clang::ARM::BI_InterlockedXor8_acq:
1789 case clang::ARM::BI_InterlockedXor16_acq:
1790 case clang::ARM::BI_InterlockedXor_acq:
1791 case clang::ARM::BI_InterlockedXor64_acq:
1792 return MSVCIntrin::_InterlockedXor_acq;
1793 case clang::ARM::BI_InterlockedXor8_rel:
1794 case clang::ARM::BI_InterlockedXor16_rel:
1795 case clang::ARM::BI_InterlockedXor_rel:
1796 case clang::ARM::BI_InterlockedXor64_rel:
1797 return MSVCIntrin::_InterlockedXor_rel;
1798 case clang::ARM::BI_InterlockedXor8_nf:
1799 case clang::ARM::BI_InterlockedXor16_nf:
1800 case clang::ARM::BI_InterlockedXor_nf:
1801 case clang::ARM::BI_InterlockedXor64_nf:
1802 return MSVCIntrin::_InterlockedXor_nf;
1803 case clang::ARM::BI_InterlockedAnd8_acq:
1804 case clang::ARM::BI_InterlockedAnd16_acq:
1805 case clang::ARM::BI_InterlockedAnd_acq:
1806 case clang::ARM::BI_InterlockedAnd64_acq:
1807 return MSVCIntrin::_InterlockedAnd_acq;
1808 case clang::ARM::BI_InterlockedAnd8_rel:
1809 case clang::ARM::BI_InterlockedAnd16_rel:
1810 case clang::ARM::BI_InterlockedAnd_rel:
1811 case clang::ARM::BI_InterlockedAnd64_rel:
1812 return MSVCIntrin::_InterlockedAnd_rel;
1813 case clang::ARM::BI_InterlockedAnd8_nf:
1814 case clang::ARM::BI_InterlockedAnd16_nf:
1815 case clang::ARM::BI_InterlockedAnd_nf:
1816 case clang::ARM::BI_InterlockedAnd64_nf:
1817 return MSVCIntrin::_InterlockedAnd_nf;
1818 case clang::ARM::BI_InterlockedIncrement16_acq:
1819 case clang::ARM::BI_InterlockedIncrement_acq:
1820 case clang::ARM::BI_InterlockedIncrement64_acq:
1821 return MSVCIntrin::_InterlockedIncrement_acq;
1822 case clang::ARM::BI_InterlockedIncrement16_rel:
1823 case clang::ARM::BI_InterlockedIncrement_rel:
1824 case clang::ARM::BI_InterlockedIncrement64_rel:
1825 return MSVCIntrin::_InterlockedIncrement_rel;
1826 case clang::ARM::BI_InterlockedIncrement16_nf:
1827 case clang::ARM::BI_InterlockedIncrement_nf:
1828 case clang::ARM::BI_InterlockedIncrement64_nf:
1829 return MSVCIntrin::_InterlockedIncrement_nf;
1830 case clang::ARM::BI_InterlockedDecrement16_acq:
1831 case clang::ARM::BI_InterlockedDecrement_acq:
1832 case clang::ARM::BI_InterlockedDecrement64_acq:
1833 return MSVCIntrin::_InterlockedDecrement_acq;
1834 case clang::ARM::BI_InterlockedDecrement16_rel:
1835 case clang::ARM::BI_InterlockedDecrement_rel:
1836 case clang::ARM::BI_InterlockedDecrement64_rel:
1837 return MSVCIntrin::_InterlockedDecrement_rel;
1838 case clang::ARM::BI_InterlockedDecrement16_nf:
1839 case clang::ARM::BI_InterlockedDecrement_nf:
1840 case clang::ARM::BI_InterlockedDecrement64_nf:
1841 return MSVCIntrin::_InterlockedDecrement_nf;
1842 }
1843 llvm_unreachable("must return from switch");
1844}
1845
1846static std::optional<CodeGenFunction::MSVCIntrin>
1847translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1848 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1849 switch (BuiltinID) {
1850 default:
1851 return std::nullopt;
1852 case clang::AArch64::BI_BitScanForward:
1853 case clang::AArch64::BI_BitScanForward64:
1854 return MSVCIntrin::_BitScanForward;
1855 case clang::AArch64::BI_BitScanReverse:
1856 case clang::AArch64::BI_BitScanReverse64:
1857 return MSVCIntrin::_BitScanReverse;
1858 case clang::AArch64::BI_InterlockedAnd64:
1859 return MSVCIntrin::_InterlockedAnd;
1860 case clang::AArch64::BI_InterlockedExchange64:
1861 return MSVCIntrin::_InterlockedExchange;
1862 case clang::AArch64::BI_InterlockedExchangeAdd64:
1863 return MSVCIntrin::_InterlockedExchangeAdd;
1864 case clang::AArch64::BI_InterlockedExchangeSub64:
1865 return MSVCIntrin::_InterlockedExchangeSub;
1866 case clang::AArch64::BI_InterlockedOr64:
1867 return MSVCIntrin::_InterlockedOr;
1868 case clang::AArch64::BI_InterlockedXor64:
1869 return MSVCIntrin::_InterlockedXor;
1870 case clang::AArch64::BI_InterlockedDecrement64:
1871 return MSVCIntrin::_InterlockedDecrement;
1872 case clang::AArch64::BI_InterlockedIncrement64:
1873 return MSVCIntrin::_InterlockedIncrement;
1874 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1875 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1876 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1877 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1878 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1879 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1880 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1881 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1882 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1883 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1884 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1885 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1886 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1887 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1888 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1889 case clang::AArch64::BI_InterlockedExchange8_acq:
1890 case clang::AArch64::BI_InterlockedExchange16_acq:
1891 case clang::AArch64::BI_InterlockedExchange_acq:
1892 case clang::AArch64::BI_InterlockedExchange64_acq:
1893 case clang::AArch64::BI_InterlockedExchangePointer_acq:
1894 return MSVCIntrin::_InterlockedExchange_acq;
1895 case clang::AArch64::BI_InterlockedExchange8_rel:
1896 case clang::AArch64::BI_InterlockedExchange16_rel:
1897 case clang::AArch64::BI_InterlockedExchange_rel:
1898 case clang::AArch64::BI_InterlockedExchange64_rel:
1899 case clang::AArch64::BI_InterlockedExchangePointer_rel:
1900 return MSVCIntrin::_InterlockedExchange_rel;
1901 case clang::AArch64::BI_InterlockedExchange8_nf:
1902 case clang::AArch64::BI_InterlockedExchange16_nf:
1903 case clang::AArch64::BI_InterlockedExchange_nf:
1904 case clang::AArch64::BI_InterlockedExchange64_nf:
1905 case clang::AArch64::BI_InterlockedExchangePointer_nf:
1906 return MSVCIntrin::_InterlockedExchange_nf;
1907 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1908 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1909 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1910 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1911 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
1912 return MSVCIntrin::_InterlockedCompareExchange_acq;
1913 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1914 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1915 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1916 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1917 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
1918 return MSVCIntrin::_InterlockedCompareExchange_rel;
1919 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1920 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1921 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1922 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1923 return MSVCIntrin::_InterlockedCompareExchange_nf;
1924 case clang::AArch64::BI_InterlockedCompareExchange128:
1925 return MSVCIntrin::_InterlockedCompareExchange128;
1926 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1927 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1928 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1929 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1930 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1931 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1932 case clang::AArch64::BI_InterlockedOr8_acq:
1933 case clang::AArch64::BI_InterlockedOr16_acq:
1934 case clang::AArch64::BI_InterlockedOr_acq:
1935 case clang::AArch64::BI_InterlockedOr64_acq:
1936 return MSVCIntrin::_InterlockedOr_acq;
1937 case clang::AArch64::BI_InterlockedOr8_rel:
1938 case clang::AArch64::BI_InterlockedOr16_rel:
1939 case clang::AArch64::BI_InterlockedOr_rel:
1940 case clang::AArch64::BI_InterlockedOr64_rel:
1941 return MSVCIntrin::_InterlockedOr_rel;
1942 case clang::AArch64::BI_InterlockedOr8_nf:
1943 case clang::AArch64::BI_InterlockedOr16_nf:
1944 case clang::AArch64::BI_InterlockedOr_nf:
1945 case clang::AArch64::BI_InterlockedOr64_nf:
1946 return MSVCIntrin::_InterlockedOr_nf;
1947 case clang::AArch64::BI_InterlockedXor8_acq:
1948 case clang::AArch64::BI_InterlockedXor16_acq:
1949 case clang::AArch64::BI_InterlockedXor_acq:
1950 case clang::AArch64::BI_InterlockedXor64_acq:
1951 return MSVCIntrin::_InterlockedXor_acq;
1952 case clang::AArch64::BI_InterlockedXor8_rel:
1953 case clang::AArch64::BI_InterlockedXor16_rel:
1954 case clang::AArch64::BI_InterlockedXor_rel:
1955 case clang::AArch64::BI_InterlockedXor64_rel:
1956 return MSVCIntrin::_InterlockedXor_rel;
1957 case clang::AArch64::BI_InterlockedXor8_nf:
1958 case clang::AArch64::BI_InterlockedXor16_nf:
1959 case clang::AArch64::BI_InterlockedXor_nf:
1960 case clang::AArch64::BI_InterlockedXor64_nf:
1961 return MSVCIntrin::_InterlockedXor_nf;
1962 case clang::AArch64::BI_InterlockedAnd8_acq:
1963 case clang::AArch64::BI_InterlockedAnd16_acq:
1964 case clang::AArch64::BI_InterlockedAnd_acq:
1965 case clang::AArch64::BI_InterlockedAnd64_acq:
1966 return MSVCIntrin::_InterlockedAnd_acq;
1967 case clang::AArch64::BI_InterlockedAnd8_rel:
1968 case clang::AArch64::BI_InterlockedAnd16_rel:
1969 case clang::AArch64::BI_InterlockedAnd_rel:
1970 case clang::AArch64::BI_InterlockedAnd64_rel:
1971 return MSVCIntrin::_InterlockedAnd_rel;
1972 case clang::AArch64::BI_InterlockedAnd8_nf:
1973 case clang::AArch64::BI_InterlockedAnd16_nf:
1974 case clang::AArch64::BI_InterlockedAnd_nf:
1975 case clang::AArch64::BI_InterlockedAnd64_nf:
1976 return MSVCIntrin::_InterlockedAnd_nf;
1977 case clang::AArch64::BI_InterlockedIncrement16_acq:
1978 case clang::AArch64::BI_InterlockedIncrement_acq:
1979 case clang::AArch64::BI_InterlockedIncrement64_acq:
1980 return MSVCIntrin::_InterlockedIncrement_acq;
1981 case clang::AArch64::BI_InterlockedIncrement16_rel:
1982 case clang::AArch64::BI_InterlockedIncrement_rel:
1983 case clang::AArch64::BI_InterlockedIncrement64_rel:
1984 return MSVCIntrin::_InterlockedIncrement_rel;
1985 case clang::AArch64::BI_InterlockedIncrement16_nf:
1986 case clang::AArch64::BI_InterlockedIncrement_nf:
1987 case clang::AArch64::BI_InterlockedIncrement64_nf:
1988 return MSVCIntrin::_InterlockedIncrement_nf;
1989 case clang::AArch64::BI_InterlockedDecrement16_acq:
1990 case clang::AArch64::BI_InterlockedDecrement_acq:
1991 case clang::AArch64::BI_InterlockedDecrement64_acq:
1992 return MSVCIntrin::_InterlockedDecrement_acq;
1993 case clang::AArch64::BI_InterlockedDecrement16_rel:
1994 case clang::AArch64::BI_InterlockedDecrement_rel:
1995 case clang::AArch64::BI_InterlockedDecrement64_rel:
1996 return MSVCIntrin::_InterlockedDecrement_rel;
1997 case clang::AArch64::BI_InterlockedDecrement16_nf:
1998 case clang::AArch64::BI_InterlockedDecrement_nf:
1999 case clang::AArch64::BI_InterlockedDecrement64_nf:
2000 return MSVCIntrin::_InterlockedDecrement_nf;
2001 }
2002 llvm_unreachable("must return from switch");
2003}
2004
2005static std::optional<CodeGenFunction::MSVCIntrin>
2006translateX86ToMsvcIntrin(unsigned BuiltinID) {
2007 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
2008 switch (BuiltinID) {
2009 default:
2010 return std::nullopt;
2011 case clang::X86::BI_BitScanForward:
2012 case clang::X86::BI_BitScanForward64:
2013 return MSVCIntrin::_BitScanForward;
2014 case clang::X86::BI_BitScanReverse:
2015 case clang::X86::BI_BitScanReverse64:
2016 return MSVCIntrin::_BitScanReverse;
2017 case clang::X86::BI_InterlockedAnd64:
2018 return MSVCIntrin::_InterlockedAnd;
2019 case clang::X86::BI_InterlockedCompareExchange128:
2020 return MSVCIntrin::_InterlockedCompareExchange128;
2021 case clang::X86::BI_InterlockedExchange64:
2022 return MSVCIntrin::_InterlockedExchange;
2023 case clang::X86::BI_InterlockedExchangeAdd64:
2024 return MSVCIntrin::_InterlockedExchangeAdd;
2025 case clang::X86::BI_InterlockedExchangeSub64:
2026 return MSVCIntrin::_InterlockedExchangeSub;
2027 case clang::X86::BI_InterlockedOr64:
2028 return MSVCIntrin::_InterlockedOr;
2029 case clang::X86::BI_InterlockedXor64:
2030 return MSVCIntrin::_InterlockedXor;
2031 case clang::X86::BI_InterlockedDecrement64:
2032 return MSVCIntrin::_InterlockedDecrement;
2033 case clang::X86::BI_InterlockedIncrement64:
2034 return MSVCIntrin::_InterlockedIncrement;
2035 }
2036 llvm_unreachable("must return from switch");
2037}
2038
2039// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
2040Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
2041 const CallExpr *E) {
2042 switch (BuiltinID) {
2043 case MSVCIntrin::_BitScanForward:
2044 case MSVCIntrin::_BitScanReverse: {
2045 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
2046 Value *ArgValue = EmitScalarExpr(E->getArg(1));
2047
2048 llvm::Type *ArgType = ArgValue->getType();
2049 llvm::Type *IndexType = IndexAddress.getElementType();
2050 llvm::Type *ResultType = ConvertType(E->getType());
2051
2052 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
2053 Value *ResZero = llvm::Constant::getNullValue(ResultType);
2054 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
2055
2056 BasicBlock *Begin = Builder.GetInsertBlock();
2057 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
2058 Builder.SetInsertPoint(End);
2059 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
2060
2061 Builder.SetInsertPoint(Begin);
2062 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
2063 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
2064 Builder.CreateCondBr(IsZero, End, NotZero);
2065 Result->addIncoming(ResZero, Begin);
2066
2067 Builder.SetInsertPoint(NotZero);
2068
2069 if (BuiltinID == MSVCIntrin::_BitScanForward) {
2070 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2071 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2072 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2073 Builder.CreateStore(ZeroCount, IndexAddress, false);
2074 } else {
2075 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
2076 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
2077
2078 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2079 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2080 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2081 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
2082 Builder.CreateStore(Index, IndexAddress, false);
2083 }
2084 Builder.CreateBr(End);
2085 Result->addIncoming(ResOne, NotZero);
2086
2087 Builder.SetInsertPoint(End);
2088 return Result;
2089 }
2090 case MSVCIntrin::_InterlockedAnd:
2091 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
2092 case MSVCIntrin::_InterlockedExchange:
2093 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
2094 case MSVCIntrin::_InterlockedExchangeAdd:
2095 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
2096 case MSVCIntrin::_InterlockedExchangeSub:
2097 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
2098 case MSVCIntrin::_InterlockedOr:
2099 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
2100 case MSVCIntrin::_InterlockedXor:
2101 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
2102 case MSVCIntrin::_InterlockedExchangeAdd_acq:
2103 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2104 AtomicOrdering::Acquire);
2105 case MSVCIntrin::_InterlockedExchangeAdd_rel:
2106 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2107 AtomicOrdering::Release);
2108 case MSVCIntrin::_InterlockedExchangeAdd_nf:
2109 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2110 AtomicOrdering::Monotonic);
2111 case MSVCIntrin::_InterlockedExchange_acq:
2112 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2113 AtomicOrdering::Acquire);
2114 case MSVCIntrin::_InterlockedExchange_rel:
2115 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2116 AtomicOrdering::Release);
2117 case MSVCIntrin::_InterlockedExchange_nf:
2118 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2119 AtomicOrdering::Monotonic);
2120 case MSVCIntrin::_InterlockedCompareExchange:
2121 return EmitAtomicCmpXchgForMSIntrin(*this, E);
2122 case MSVCIntrin::_InterlockedCompareExchange_acq:
2123 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
2124 case MSVCIntrin::_InterlockedCompareExchange_rel:
2125 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
2126 case MSVCIntrin::_InterlockedCompareExchange_nf:
2127 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2128 case MSVCIntrin::_InterlockedCompareExchange128:
2130 *this, E, AtomicOrdering::SequentiallyConsistent);
2131 case MSVCIntrin::_InterlockedCompareExchange128_acq:
2132 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
2133 case MSVCIntrin::_InterlockedCompareExchange128_rel:
2134 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
2135 case MSVCIntrin::_InterlockedCompareExchange128_nf:
2136 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2137 case MSVCIntrin::_InterlockedOr_acq:
2138 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2139 AtomicOrdering::Acquire);
2140 case MSVCIntrin::_InterlockedOr_rel:
2141 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2142 AtomicOrdering::Release);
2143 case MSVCIntrin::_InterlockedOr_nf:
2144 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2145 AtomicOrdering::Monotonic);
2146 case MSVCIntrin::_InterlockedXor_acq:
2147 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2148 AtomicOrdering::Acquire);
2149 case MSVCIntrin::_InterlockedXor_rel:
2150 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2151 AtomicOrdering::Release);
2152 case MSVCIntrin::_InterlockedXor_nf:
2153 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2154 AtomicOrdering::Monotonic);
2155 case MSVCIntrin::_InterlockedAnd_acq:
2156 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2157 AtomicOrdering::Acquire);
2158 case MSVCIntrin::_InterlockedAnd_rel:
2159 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2160 AtomicOrdering::Release);
2161 case MSVCIntrin::_InterlockedAnd_nf:
2162 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2163 AtomicOrdering::Monotonic);
2164 case MSVCIntrin::_InterlockedIncrement_acq:
2165 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
2166 case MSVCIntrin::_InterlockedIncrement_rel:
2167 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
2168 case MSVCIntrin::_InterlockedIncrement_nf:
2169 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
2170 case MSVCIntrin::_InterlockedDecrement_acq:
2171 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
2172 case MSVCIntrin::_InterlockedDecrement_rel:
2173 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
2174 case MSVCIntrin::_InterlockedDecrement_nf:
2175 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
2176
2177 case MSVCIntrin::_InterlockedDecrement:
2178 return EmitAtomicDecrementValue(*this, E);
2179 case MSVCIntrin::_InterlockedIncrement:
2180 return EmitAtomicIncrementValue(*this, E);
2181
2182 case MSVCIntrin::__fastfail: {
2183 // Request immediate process termination from the kernel. The instruction
2184 // sequences to do this are documented on MSDN:
2185 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2186 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
2187 StringRef Asm, Constraints;
2188 switch (ISA) {
2189 default:
2190 ErrorUnsupported(E, "__fastfail call for this architecture");
2191 break;
2192 case llvm::Triple::x86:
2193 case llvm::Triple::x86_64:
2194 Asm = "int $$0x29";
2195 Constraints = "{cx}";
2196 break;
2197 case llvm::Triple::thumb:
2198 Asm = "udf #251";
2199 Constraints = "{r0}";
2200 break;
2201 case llvm::Triple::aarch64:
2202 Asm = "brk #0xF003";
2203 Constraints = "{w0}";
2204 }
2205 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
2206 llvm::InlineAsm *IA =
2207 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
2208 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2209 getLLVMContext(), llvm::AttributeList::FunctionIndex,
2210 llvm::Attribute::NoReturn);
2211 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
2212 CI->setAttributes(NoReturnAttr);
2213 return CI;
2214 }
2215 }
2216 llvm_unreachable("Incorrect MSVC intrinsic!");
2217}
2218
2219namespace {
2220// ARC cleanup for __builtin_os_log_format
2221struct CallObjCArcUse final : EHScopeStack::Cleanup {
2222 CallObjCArcUse(llvm::Value *object) : object(object) {}
2223 llvm::Value *object;
2224
2225 void Emit(CodeGenFunction &CGF, Flags flags) override {
2226 CGF.EmitARCIntrinsicUse(object);
2227 }
2228};
2229}
2230
2232 BuiltinCheckKind Kind) {
2233 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
2234 "Unsupported builtin check kind");
2235
2236 Value *ArgValue = EmitScalarExpr(E);
2237 if (!SanOpts.has(SanitizerKind::Builtin))
2238 return ArgValue;
2239
2240 SanitizerScope SanScope(this);
2241 Value *Cond = Builder.CreateICmpNE(
2242 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2243 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2244 SanitizerHandler::InvalidBuiltin,
2246 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2247 {});
2248 return ArgValue;
2249}
2250
2252 Value *ArgValue = EvaluateExprAsBool(E);
2253 if (!SanOpts.has(SanitizerKind::Builtin))
2254 return ArgValue;
2255
2256 SanitizerScope SanScope(this);
2257 EmitCheck(
2258 std::make_pair(ArgValue, SanitizerKind::Builtin),
2259 SanitizerHandler::InvalidBuiltin,
2261 llvm::ConstantInt::get(Builder.getInt8Ty(), BCK_AssumePassedFalse)},
2262 std::nullopt);
2263 return ArgValue;
2264}
2265
2266static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2267 return CGF.Builder.CreateBinaryIntrinsic(
2268 Intrinsic::abs, ArgValue,
2269 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2270}
2271
2273 bool SanitizeOverflow) {
2274 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2275
2276 // Try to eliminate overflow check.
2277 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2278 if (!VCI->isMinSignedValue())
2279 return EmitAbs(CGF, ArgValue, true);
2280 }
2281
2282 CodeGenFunction::SanitizerScope SanScope(&CGF);
2283
2284 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2285 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2286 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2287 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2288 Value *NotOverflow = CGF.Builder.CreateNot(
2289 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2290
2291 // TODO: support -ftrapv-handler.
2292 if (SanitizeOverflow) {
2293 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2294 SanitizerHandler::NegateOverflow,
2295 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2297 {ArgValue});
2298 } else
2299 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2300
2301 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2302 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2303}
2304
2305/// Get the argument type for arguments to os_log_helper.
2307 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2308 return C.getCanonicalType(UnsignedTy);
2309}
2310
2313 CharUnits BufferAlignment) {
2314 ASTContext &Ctx = getContext();
2315
2317 {
2318 raw_svector_ostream OS(Name);
2319 OS << "__os_log_helper";
2320 OS << "_" << BufferAlignment.getQuantity();
2321 OS << "_" << int(Layout.getSummaryByte());
2322 OS << "_" << int(Layout.getNumArgsByte());
2323 for (const auto &Item : Layout.Items)
2324 OS << "_" << int(Item.getSizeByte()) << "_"
2325 << int(Item.getDescriptorByte());
2326 }
2327
2328 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2329 return F;
2330
2332 FunctionArgList Args;
2333 Args.push_back(ImplicitParamDecl::Create(
2334 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2336 ArgTys.emplace_back(Ctx.VoidPtrTy);
2337
2338 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2339 char Size = Layout.Items[I].getSizeByte();
2340 if (!Size)
2341 continue;
2342
2343 QualType ArgTy = getOSLogArgType(Ctx, Size);
2344 Args.push_back(ImplicitParamDecl::Create(
2345 Ctx, nullptr, SourceLocation(),
2346 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2348 ArgTys.emplace_back(ArgTy);
2349 }
2350
2351 QualType ReturnTy = Ctx.VoidTy;
2352
2353 // The helper function has linkonce_odr linkage to enable the linker to merge
2354 // identical functions. To ensure the merging always happens, 'noinline' is
2355 // attached to the function when compiling with -Oz.
2356 const CGFunctionInfo &FI =
2358 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2359 llvm::Function *Fn = llvm::Function::Create(
2360 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2361 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2362 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2364 Fn->setDoesNotThrow();
2365
2366 // Attach 'noinline' at -Oz.
2367 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2368 Fn->addFnAttr(llvm::Attribute::NoInline);
2369
2370 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2371 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2372
2373 // Create a scope with an artificial location for the body of this function.
2374 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2375
2376 CharUnits Offset;
2378 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2379 BufferAlignment);
2380 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2381 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2382 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2383 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2384
2385 unsigned I = 1;
2386 for (const auto &Item : Layout.Items) {
2388 Builder.getInt8(Item.getDescriptorByte()),
2389 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2391 Builder.getInt8(Item.getSizeByte()),
2392 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2393
2394 CharUnits Size = Item.size();
2395 if (!Size.getQuantity())
2396 continue;
2397
2398 Address Arg = GetAddrOfLocalVar(Args[I]);
2399 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2400 Addr = Addr.withElementType(Arg.getElementType());
2402 Offset += Size;
2403 ++I;
2404 }
2405
2407
2408 return Fn;
2409}
2410
2412 assert(E.getNumArgs() >= 2 &&
2413 "__builtin_os_log_format takes at least 2 arguments");
2414 ASTContext &Ctx = getContext();
2417 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2418 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2419
2420 // Ignore argument 1, the format string. It is not currently used.
2421 CallArgList Args;
2422 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2423
2424 for (const auto &Item : Layout.Items) {
2425 int Size = Item.getSizeByte();
2426 if (!Size)
2427 continue;
2428
2429 llvm::Value *ArgVal;
2430
2431 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2432 uint64_t Val = 0;
2433 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2434 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2435 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2436 } else if (const Expr *TheExpr = Item.getExpr()) {
2437 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2438
2439 // If a temporary object that requires destruction after the full
2440 // expression is passed, push a lifetime-extended cleanup to extend its
2441 // lifetime to the end of the enclosing block scope.
2442 auto LifetimeExtendObject = [&](const Expr *E) {
2443 E = E->IgnoreParenCasts();
2444 // Extend lifetimes of objects returned by function calls and message
2445 // sends.
2446
2447 // FIXME: We should do this in other cases in which temporaries are
2448 // created including arguments of non-ARC types (e.g., C++
2449 // temporaries).
2450 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2451 return true;
2452 return false;
2453 };
2454
2455 if (TheExpr->getType()->isObjCRetainableType() &&
2456 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2457 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2458 "Only scalar can be a ObjC retainable type");
2459 if (!isa<Constant>(ArgVal)) {
2460 CleanupKind Cleanup = getARCCleanupKind();
2461 QualType Ty = TheExpr->getType();
2463 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2464 ArgVal = EmitARCRetain(Ty, ArgVal);
2465 Builder.CreateStore(ArgVal, Addr);
2466 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2468 Cleanup & EHCleanup);
2469
2470 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2471 // argument has to be alive.
2472 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2473 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2474 }
2475 }
2476 } else {
2477 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2478 }
2479
2480 unsigned ArgValSize =
2481 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2482 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2483 ArgValSize);
2484 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2485 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2486 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2487 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2488 Args.add(RValue::get(ArgVal), ArgTy);
2489 }
2490
2491 const CGFunctionInfo &FI =
2494 Layout, BufAddr.getAlignment());
2496 return RValue::get(BufAddr, *this);
2497}
2498
2500 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2501 WidthAndSignedness ResultInfo) {
2502 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2503 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2504 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2505}
2506
2508 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2509 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2510 const clang::Expr *ResultArg, QualType ResultQTy,
2511 WidthAndSignedness ResultInfo) {
2513 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2514 "Cannot specialize this multiply");
2515
2516 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2517 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2518
2519 llvm::Value *HasOverflow;
2520 llvm::Value *Result = EmitOverflowIntrinsic(
2521 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2522
2523 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2524 // however, since the original builtin had a signed result, we need to report
2525 // an overflow when the result is greater than INT_MAX.
2526 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2527 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2528
2529 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2530 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2531
2532 bool isVolatile =
2533 ResultArg->getType()->getPointeeType().isVolatileQualified();
2534 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2535 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2536 isVolatile);
2537 return RValue::get(HasOverflow);
2538}
2539
2540/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2541static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2542 WidthAndSignedness Op1Info,
2543 WidthAndSignedness Op2Info,
2544 WidthAndSignedness ResultInfo) {
2545 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2546 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2547 Op1Info.Signed != Op2Info.Signed;
2548}
2549
2550/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2551/// the generic checked-binop irgen.
2552static RValue
2554 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2555 WidthAndSignedness Op2Info,
2556 const clang::Expr *ResultArg, QualType ResultQTy,
2557 WidthAndSignedness ResultInfo) {
2558 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2559 Op2Info, ResultInfo) &&
2560 "Not a mixed-sign multipliction we can specialize");
2561
2562 // Emit the signed and unsigned operands.
2563 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2564 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2565 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2566 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2567 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2568 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2569
2570 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2571 if (SignedOpWidth < UnsignedOpWidth)
2572 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2573 if (UnsignedOpWidth < SignedOpWidth)
2574 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2575
2576 llvm::Type *OpTy = Signed->getType();
2577 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2578 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2579 llvm::Type *ResTy = ResultPtr.getElementType();
2580 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2581
2582 // Take the absolute value of the signed operand.
2583 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2584 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2585 llvm::Value *AbsSigned =
2586 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2587
2588 // Perform a checked unsigned multiplication.
2589 llvm::Value *UnsignedOverflow;
2590 llvm::Value *UnsignedResult =
2591 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2592 Unsigned, UnsignedOverflow);
2593
2594 llvm::Value *Overflow, *Result;
2595 if (ResultInfo.Signed) {
2596 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2597 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2598 auto IntMax =
2599 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2600 llvm::Value *MaxResult =
2601 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2602 CGF.Builder.CreateZExt(IsNegative, OpTy));
2603 llvm::Value *SignedOverflow =
2604 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2605 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2606
2607 // Prepare the signed result (possibly by negating it).
2608 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2609 llvm::Value *SignedResult =
2610 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2611 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2612 } else {
2613 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2614 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2615 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2616 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2617 if (ResultInfo.Width < OpWidth) {
2618 auto IntMax =
2619 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2620 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2621 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2622 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2623 }
2624
2625 // Negate the product if it would be negative in infinite precision.
2626 Result = CGF.Builder.CreateSelect(
2627 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2628
2629 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2630 }
2631 assert(Overflow && Result && "Missing overflow or result");
2632
2633 bool isVolatile =
2634 ResultArg->getType()->getPointeeType().isVolatileQualified();
2635 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2636 isVolatile);
2637 return RValue::get(Overflow);
2638}
2639
2640static bool
2642 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2643 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2644 Ty = Ctx.getBaseElementType(Arr);
2645
2646 const auto *Record = Ty->getAsCXXRecordDecl();
2647 if (!Record)
2648 return false;
2649
2650 // We've already checked this type, or are in the process of checking it.
2651 if (!Seen.insert(Record).second)
2652 return false;
2653
2654 assert(Record->hasDefinition() &&
2655 "Incomplete types should already be diagnosed");
2656
2657 if (Record->isDynamicClass())
2658 return true;
2659
2660 for (FieldDecl *F : Record->fields()) {
2661 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2662 return true;
2663 }
2664 return false;
2665}
2666
2667/// Determine if the specified type requires laundering by checking if it is a
2668/// dynamic class type or contains a subobject which is a dynamic class type.
2670 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2671 return false;
2673 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2674}
2675
2676RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2677 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2678 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2679
2680 // The builtin's shift arg may have a different type than the source arg and
2681 // result, but the LLVM intrinsic uses the same type for all values.
2682 llvm::Type *Ty = Src->getType();
2683 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2684
2685 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2686 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2687 Function *F = CGM.getIntrinsic(IID, Ty);
2688 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2689}
2690
2691// Map math builtins for long-double to f128 version.
2692static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2693 switch (BuiltinID) {
2694#define MUTATE_LDBL(func) \
2695 case Builtin::BI__builtin_##func##l: \
2696 return Builtin::BI__builtin_##func##f128;
2727 MUTATE_LDBL(nans)
2728 MUTATE_LDBL(inf)
2747 MUTATE_LDBL(huge_val)
2757#undef MUTATE_LDBL
2758 default:
2759 return BuiltinID;
2760 }
2761}
2762
2763static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2764 Value *V) {
2765 if (CGF.Builder.getIsFPConstrained() &&
2766 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2767 if (Value *Result =
2768 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2769 return Result;
2770 }
2771 return nullptr;
2772}
2773
2775 const FunctionDecl *FD) {
2776 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2777 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2778 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2779
2781 for (auto &&FormalTy : FnTy->params())
2782 Args.push_back(llvm::PoisonValue::get(FormalTy));
2783
2784 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2785}
2786
2787RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2788 const CallExpr *E,
2789 ReturnValueSlot ReturnValue) {
2790 assert(!getContext().BuiltinInfo.isImmediate(BuiltinID) &&
2791 "Should not codegen for consteval builtins");
2792
2793 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2794 // See if we can constant fold this builtin. If so, don't emit it at all.
2795 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2798 !Result.hasSideEffects()) {
2799 if (Result.Val.isInt())
2800 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2801 Result.Val.getInt()));
2802 if (Result.Val.isFloat())
2803 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2804 Result.Val.getFloat()));
2805 }
2806
2807 // If current long-double semantics is IEEE 128-bit, replace math builtins
2808 // of long-double with f128 equivalent.
2809 // TODO: This mutation should also be applied to other targets other than PPC,
2810 // after backend supports IEEE 128-bit style libcalls.
2811 if (getTarget().getTriple().isPPC64() &&
2812 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2813 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2814
2815 // If the builtin has been declared explicitly with an assembler label,
2816 // disable the specialized emitting below. Ideally we should communicate the
2817 // rename in IR, or at least avoid generating the intrinsic calls that are
2818 // likely to get lowered to the renamed library functions.
2819 const unsigned BuiltinIDIfNoAsmLabel =
2820 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2821
2822 std::optional<bool> ErrnoOverriden;
2823 // ErrnoOverriden is true if math-errno is overriden via the
2824 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2825 // which implies math-errno.
2826 if (E->hasStoredFPFeatures()) {
2827 FPOptionsOverride OP = E->getFPFeatures();
2828 if (OP.hasMathErrnoOverride())
2829 ErrnoOverriden = OP.getMathErrnoOverride();
2830 }
2831 // True if 'attribute__((optnone))' is used. This attribute overrides
2832 // fast-math which implies math-errno.
2833 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2834
2835 // True if we are compiling at -O2 and errno has been disabled
2836 // using the '#pragma float_control(precise, off)', and
2837 // attribute opt-none hasn't been seen.
2838 bool ErrnoOverridenToFalseWithOpt =
2839 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2840 CGM.getCodeGenOpts().OptimizationLevel != 0;
2841
2842 // There are LLVM math intrinsics/instructions corresponding to math library
2843 // functions except the LLVM op will never set errno while the math library
2844 // might. Also, math builtins have the same semantics as their math library
2845 // twins. Thus, we can transform math library and builtin calls to their
2846 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2847 // In case FP exceptions are enabled, the experimental versions of the
2848 // intrinsics model those.
2849 bool ConstAlways =
2850 getContext().BuiltinInfo.isConst(BuiltinID);
2851
2852 // There's a special case with the fma builtins where they are always const
2853 // if the target environment is GNU or the target is OS is Windows and we're
2854 // targeting the MSVCRT.dll environment.
2855 // FIXME: This list can be become outdated. Need to find a way to get it some
2856 // other way.
2857 switch (BuiltinID) {
2858 case Builtin::BI__builtin_fma:
2859 case Builtin::BI__builtin_fmaf:
2860 case Builtin::BI__builtin_fmal:
2861 case Builtin::BI__builtin_fmaf16:
2862 case Builtin::BIfma:
2863 case Builtin::BIfmaf:
2864 case Builtin::BIfmal: {
2865 auto &Trip = CGM.getTriple();
2866 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2867 ConstAlways = true;
2868 break;
2869 }
2870 default:
2871 break;
2872 }
2873
2874 bool ConstWithoutErrnoAndExceptions =
2876 bool ConstWithoutExceptions =
2878
2879 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2880 // disabled.
2881 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2882 // or attributes that affect math-errno should prevent or allow math
2883 // intrincs to be generated. Intrinsics are generated:
2884 // 1- In fast math mode, unless math-errno is overriden
2885 // via '#pragma float_control(precise, on)', or via an
2886 // 'attribute__((optnone))'.
2887 // 2- If math-errno was enabled on command line but overriden
2888 // to false via '#pragma float_control(precise, off))' and
2889 // 'attribute__((optnone))' hasn't been used.
2890 // 3- If we are compiling with optimization and errno has been disabled
2891 // via '#pragma float_control(precise, off)', and
2892 // 'attribute__((optnone))' hasn't been used.
2893
2894 bool ConstWithoutErrnoOrExceptions =
2895 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2896 bool GenerateIntrinsics =
2897 (ConstAlways && !OptNone) ||
2898 (!getLangOpts().MathErrno &&
2899 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2900 if (!GenerateIntrinsics) {
2901 GenerateIntrinsics =
2902 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2903 if (!GenerateIntrinsics)
2904 GenerateIntrinsics =
2905 ConstWithoutErrnoOrExceptions &&
2906 (!getLangOpts().MathErrno &&
2907 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2908 if (!GenerateIntrinsics)
2909 GenerateIntrinsics =
2910 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2911 }
2912 if (GenerateIntrinsics) {
2913 switch (BuiltinIDIfNoAsmLabel) {
2914 case Builtin::BIacos:
2915 case Builtin::BIacosf:
2916 case Builtin::BIacosl:
2917 case Builtin::BI__builtin_acos:
2918 case Builtin::BI__builtin_acosf:
2919 case Builtin::BI__builtin_acosf16:
2920 case Builtin::BI__builtin_acosl:
2921 case Builtin::BI__builtin_acosf128:
2923 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2924
2925 case Builtin::BIasin:
2926 case Builtin::BIasinf:
2927 case Builtin::BIasinl:
2928 case Builtin::BI__builtin_asin:
2929 case Builtin::BI__builtin_asinf:
2930 case Builtin::BI__builtin_asinf16:
2931 case Builtin::BI__builtin_asinl:
2932 case Builtin::BI__builtin_asinf128:
2934 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2935
2936 case Builtin::BIatan:
2937 case Builtin::BIatanf:
2938 case Builtin::BIatanl:
2939 case Builtin::BI__builtin_atan:
2940 case Builtin::BI__builtin_atanf:
2941 case Builtin::BI__builtin_atanf16:
2942 case Builtin::BI__builtin_atanl:
2943 case Builtin::BI__builtin_atanf128:
2945 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2946
2947 case Builtin::BIatan2:
2948 case Builtin::BIatan2f:
2949 case Builtin::BIatan2l:
2950 case Builtin::BI__builtin_atan2:
2951 case Builtin::BI__builtin_atan2f:
2952 case Builtin::BI__builtin_atan2f16:
2953 case Builtin::BI__builtin_atan2l:
2954 case Builtin::BI__builtin_atan2f128:
2956 *this, E, Intrinsic::atan2,
2957 Intrinsic::experimental_constrained_atan2));
2958
2959 case Builtin::BIceil:
2960 case Builtin::BIceilf:
2961 case Builtin::BIceill:
2962 case Builtin::BI__builtin_ceil:
2963 case Builtin::BI__builtin_ceilf:
2964 case Builtin::BI__builtin_ceilf16:
2965 case Builtin::BI__builtin_ceill:
2966 case Builtin::BI__builtin_ceilf128:
2968 Intrinsic::ceil,
2969 Intrinsic::experimental_constrained_ceil));
2970
2971 case Builtin::BIcopysign:
2972 case Builtin::BIcopysignf:
2973 case Builtin::BIcopysignl:
2974 case Builtin::BI__builtin_copysign:
2975 case Builtin::BI__builtin_copysignf:
2976 case Builtin::BI__builtin_copysignf16:
2977 case Builtin::BI__builtin_copysignl:
2978 case Builtin::BI__builtin_copysignf128:
2979 return RValue::get(
2980 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2981
2982 case Builtin::BIcos:
2983 case Builtin::BIcosf:
2984 case Builtin::BIcosl:
2985 case Builtin::BI__builtin_cos:
2986 case Builtin::BI__builtin_cosf:
2987 case Builtin::BI__builtin_cosf16:
2988 case Builtin::BI__builtin_cosl:
2989 case Builtin::BI__builtin_cosf128:
2991 Intrinsic::cos,
2992 Intrinsic::experimental_constrained_cos));
2993
2994 case Builtin::BIcosh:
2995 case Builtin::BIcoshf:
2996 case Builtin::BIcoshl:
2997 case Builtin::BI__builtin_cosh:
2998 case Builtin::BI__builtin_coshf:
2999 case Builtin::BI__builtin_coshf16:
3000 case Builtin::BI__builtin_coshl:
3001 case Builtin::BI__builtin_coshf128:
3003 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
3004
3005 case Builtin::BIexp:
3006 case Builtin::BIexpf:
3007 case Builtin::BIexpl:
3008 case Builtin::BI__builtin_exp:
3009 case Builtin::BI__builtin_expf:
3010 case Builtin::BI__builtin_expf16:
3011 case Builtin::BI__builtin_expl:
3012 case Builtin::BI__builtin_expf128:
3014 Intrinsic::exp,
3015 Intrinsic::experimental_constrained_exp));
3016
3017 case Builtin::BIexp2:
3018 case Builtin::BIexp2f:
3019 case Builtin::BIexp2l:
3020 case Builtin::BI__builtin_exp2:
3021 case Builtin::BI__builtin_exp2f:
3022 case Builtin::BI__builtin_exp2f16:
3023 case Builtin::BI__builtin_exp2l:
3024 case Builtin::BI__builtin_exp2f128:
3026 Intrinsic::exp2,
3027 Intrinsic::experimental_constrained_exp2));
3028 case Builtin::BI__builtin_exp10:
3029 case Builtin::BI__builtin_exp10f:
3030 case Builtin::BI__builtin_exp10f16:
3031 case Builtin::BI__builtin_exp10l:
3032 case Builtin::BI__builtin_exp10f128: {
3033 // TODO: strictfp support
3034 if (Builder.getIsFPConstrained())
3035 break;
3036 return RValue::get(
3037 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
3038 }
3039 case Builtin::BIfabs:
3040 case Builtin::BIfabsf:
3041 case Builtin::BIfabsl:
3042 case Builtin::BI__builtin_fabs:
3043 case Builtin::BI__builtin_fabsf:
3044 case Builtin::BI__builtin_fabsf16:
3045 case Builtin::BI__builtin_fabsl:
3046 case Builtin::BI__builtin_fabsf128:
3047 return RValue::get(
3048 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
3049
3050 case Builtin::BIfloor:
3051 case Builtin::BIfloorf:
3052 case Builtin::BIfloorl:
3053 case Builtin::BI__builtin_floor:
3054 case Builtin::BI__builtin_floorf:
3055 case Builtin::BI__builtin_floorf16:
3056 case Builtin::BI__builtin_floorl:
3057 case Builtin::BI__builtin_floorf128:
3059 Intrinsic::floor,
3060 Intrinsic::experimental_constrained_floor));
3061
3062 case Builtin::BIfma:
3063 case Builtin::BIfmaf:
3064 case Builtin::BIfmal:
3065 case Builtin::BI__builtin_fma:
3066 case Builtin::BI__builtin_fmaf:
3067 case Builtin::BI__builtin_fmaf16:
3068 case Builtin::BI__builtin_fmal:
3069 case Builtin::BI__builtin_fmaf128:
3071 Intrinsic::fma,
3072 Intrinsic::experimental_constrained_fma));
3073
3074 case Builtin::BIfmax:
3075 case Builtin::BIfmaxf:
3076 case Builtin::BIfmaxl:
3077 case Builtin::BI__builtin_fmax:
3078 case Builtin::BI__builtin_fmaxf:
3079 case Builtin::BI__builtin_fmaxf16:
3080 case Builtin::BI__builtin_fmaxl:
3081 case Builtin::BI__builtin_fmaxf128:
3083 Intrinsic::maxnum,
3084 Intrinsic::experimental_constrained_maxnum));
3085
3086 case Builtin::BIfmin:
3087 case Builtin::BIfminf:
3088 case Builtin::BIfminl:
3089 case Builtin::BI__builtin_fmin:
3090 case Builtin::BI__builtin_fminf:
3091 case Builtin::BI__builtin_fminf16:
3092 case Builtin::BI__builtin_fminl:
3093 case Builtin::BI__builtin_fminf128:
3095 Intrinsic::minnum,
3096 Intrinsic::experimental_constrained_minnum));
3097
3098 case Builtin::BIfmaximum_num:
3099 case Builtin::BIfmaximum_numf:
3100 case Builtin::BIfmaximum_numl:
3101 case Builtin::BI__builtin_fmaximum_num:
3102 case Builtin::BI__builtin_fmaximum_numf:
3103 case Builtin::BI__builtin_fmaximum_numf16:
3104 case Builtin::BI__builtin_fmaximum_numl:
3105 case Builtin::BI__builtin_fmaximum_numf128:
3106 return RValue::get(
3107 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::maximumnum));
3108
3109 case Builtin::BIfminimum_num:
3110 case Builtin::BIfminimum_numf:
3111 case Builtin::BIfminimum_numl:
3112 case Builtin::BI__builtin_fminimum_num:
3113 case Builtin::BI__builtin_fminimum_numf:
3114 case Builtin::BI__builtin_fminimum_numf16:
3115 case Builtin::BI__builtin_fminimum_numl:
3116 case Builtin::BI__builtin_fminimum_numf128:
3117 return RValue::get(
3118 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::minimumnum));
3119
3120 // fmod() is a special-case. It maps to the frem instruction rather than an
3121 // LLVM intrinsic.
3122 case Builtin::BIfmod:
3123 case Builtin::BIfmodf:
3124 case Builtin::BIfmodl:
3125 case Builtin::BI__builtin_fmod:
3126 case Builtin::BI__builtin_fmodf:
3127 case Builtin::BI__builtin_fmodf16:
3128 case Builtin::BI__builtin_fmodl:
3129 case Builtin::BI__builtin_fmodf128:
3130 case Builtin::BI__builtin_elementwise_fmod: {
3131 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3132 Value *Arg1 = EmitScalarExpr(E->getArg(0));
3133 Value *Arg2 = EmitScalarExpr(E->getArg(1));
3134 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
3135 }
3136
3137 case Builtin::BIlog:
3138 case Builtin::BIlogf:
3139 case Builtin::BIlogl:
3140 case Builtin::BI__builtin_log:
3141 case Builtin::BI__builtin_logf:
3142 case Builtin::BI__builtin_logf16:
3143 case Builtin::BI__builtin_logl:
3144 case Builtin::BI__builtin_logf128:
3146 Intrinsic::log,
3147 Intrinsic::experimental_constrained_log));
3148
3149 case Builtin::BIlog10:
3150 case Builtin::BIlog10f:
3151 case Builtin::BIlog10l:
3152 case Builtin::BI__builtin_log10:
3153 case Builtin::BI__builtin_log10f:
3154 case Builtin::BI__builtin_log10f16:
3155 case Builtin::BI__builtin_log10l:
3156 case Builtin::BI__builtin_log10f128:
3158 Intrinsic::log10,
3159 Intrinsic::experimental_constrained_log10));
3160
3161 case Builtin::BIlog2:
3162 case Builtin::BIlog2f:
3163 case Builtin::BIlog2l:
3164 case Builtin::BI__builtin_log2:
3165 case Builtin::BI__builtin_log2f:
3166 case Builtin::BI__builtin_log2f16:
3167 case Builtin::BI__builtin_log2l:
3168 case Builtin::BI__builtin_log2f128:
3170 Intrinsic::log2,
3171 Intrinsic::experimental_constrained_log2));
3172
3173 case Builtin::BInearbyint:
3174 case Builtin::BInearbyintf:
3175 case Builtin::BInearbyintl:
3176 case Builtin::BI__builtin_nearbyint:
3177 case Builtin::BI__builtin_nearbyintf:
3178 case Builtin::BI__builtin_nearbyintl:
3179 case Builtin::BI__builtin_nearbyintf128:
3181 Intrinsic::nearbyint,
3182 Intrinsic::experimental_constrained_nearbyint));
3183
3184 case Builtin::BIpow:
3185 case Builtin::BIpowf:
3186 case Builtin::BIpowl:
3187 case Builtin::BI__builtin_pow:
3188 case Builtin::BI__builtin_powf:
3189 case Builtin::BI__builtin_powf16:
3190 case Builtin::BI__builtin_powl:
3191 case Builtin::BI__builtin_powf128:
3193 Intrinsic::pow,
3194 Intrinsic::experimental_constrained_pow));
3195
3196 case Builtin::BIrint:
3197 case Builtin::BIrintf:
3198 case Builtin::BIrintl:
3199 case Builtin::BI__builtin_rint:
3200 case Builtin::BI__builtin_rintf:
3201 case Builtin::BI__builtin_rintf16:
3202 case Builtin::BI__builtin_rintl:
3203 case Builtin::BI__builtin_rintf128:
3205 Intrinsic::rint,
3206 Intrinsic::experimental_constrained_rint));
3207
3208 case Builtin::BIround:
3209 case Builtin::BIroundf:
3210 case Builtin::BIroundl:
3211 case Builtin::BI__builtin_round:
3212 case Builtin::BI__builtin_roundf:
3213 case Builtin::BI__builtin_roundf16:
3214 case Builtin::BI__builtin_roundl:
3215 case Builtin::BI__builtin_roundf128:
3217 Intrinsic::round,
3218 Intrinsic::experimental_constrained_round));
3219
3220 case Builtin::BIroundeven:
3221 case Builtin::BIroundevenf:
3222 case Builtin::BIroundevenl:
3223 case Builtin::BI__builtin_roundeven:
3224 case Builtin::BI__builtin_roundevenf:
3225 case Builtin::BI__builtin_roundevenf16:
3226 case Builtin::BI__builtin_roundevenl:
3227 case Builtin::BI__builtin_roundevenf128:
3229 Intrinsic::roundeven,
3230 Intrinsic::experimental_constrained_roundeven));
3231
3232 case Builtin::BIsin:
3233 case Builtin::BIsinf:
3234 case Builtin::BIsinl:
3235 case Builtin::BI__builtin_sin:
3236 case Builtin::BI__builtin_sinf:
3237 case Builtin::BI__builtin_sinf16:
3238 case Builtin::BI__builtin_sinl:
3239 case Builtin::BI__builtin_sinf128:
3241 Intrinsic::sin,
3242 Intrinsic::experimental_constrained_sin));
3243
3244 case Builtin::BIsinh:
3245 case Builtin::BIsinhf:
3246 case Builtin::BIsinhl:
3247 case Builtin::BI__builtin_sinh:
3248 case Builtin::BI__builtin_sinhf:
3249 case Builtin::BI__builtin_sinhf16:
3250 case Builtin::BI__builtin_sinhl:
3251 case Builtin::BI__builtin_sinhf128:
3253 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
3254
3255 case Builtin::BI__builtin_sincos:
3256 case Builtin::BI__builtin_sincosf:
3257 case Builtin::BI__builtin_sincosf16:
3258 case Builtin::BI__builtin_sincosl:
3259 case Builtin::BI__builtin_sincosf128:
3260 emitSincosBuiltin(*this, E, Intrinsic::sincos);
3261 return RValue::get(nullptr);
3262
3263 case Builtin::BIsqrt:
3264 case Builtin::BIsqrtf:
3265 case Builtin::BIsqrtl:
3266 case Builtin::BI__builtin_sqrt:
3267 case Builtin::BI__builtin_sqrtf:
3268 case Builtin::BI__builtin_sqrtf16:
3269 case Builtin::BI__builtin_sqrtl:
3270 case Builtin::BI__builtin_sqrtf128:
3271 case Builtin::BI__builtin_elementwise_sqrt: {
3273 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
3275 return RValue::get(Call);
3276 }
3277
3278 case Builtin::BItan:
3279 case Builtin::BItanf:
3280 case Builtin::BItanl:
3281 case Builtin::BI__builtin_tan:
3282 case Builtin::BI__builtin_tanf:
3283 case Builtin::BI__builtin_tanf16:
3284 case Builtin::BI__builtin_tanl:
3285 case Builtin::BI__builtin_tanf128:
3287 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
3288
3289 case Builtin::BItanh:
3290 case Builtin::BItanhf:
3291 case Builtin::BItanhl:
3292 case Builtin::BI__builtin_tanh:
3293 case Builtin::BI__builtin_tanhf:
3294 case Builtin::BI__builtin_tanhf16:
3295 case Builtin::BI__builtin_tanhl:
3296 case Builtin::BI__builtin_tanhf128:
3298 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3299
3300 case Builtin::BItrunc:
3301 case Builtin::BItruncf:
3302 case Builtin::BItruncl:
3303 case Builtin::BI__builtin_trunc:
3304 case Builtin::BI__builtin_truncf:
3305 case Builtin::BI__builtin_truncf16:
3306 case Builtin::BI__builtin_truncl:
3307 case Builtin::BI__builtin_truncf128:
3309 Intrinsic::trunc,
3310 Intrinsic::experimental_constrained_trunc));
3311
3312 case Builtin::BIlround:
3313 case Builtin::BIlroundf:
3314 case Builtin::BIlroundl:
3315 case Builtin::BI__builtin_lround:
3316 case Builtin::BI__builtin_lroundf:
3317 case Builtin::BI__builtin_lroundl:
3318 case Builtin::BI__builtin_lroundf128:
3320 *this, E, Intrinsic::lround,
3321 Intrinsic::experimental_constrained_lround));
3322
3323 case Builtin::BIllround:
3324 case Builtin::BIllroundf:
3325 case Builtin::BIllroundl:
3326 case Builtin::BI__builtin_llround:
3327 case Builtin::BI__builtin_llroundf:
3328 case Builtin::BI__builtin_llroundl:
3329 case Builtin::BI__builtin_llroundf128:
3331 *this, E, Intrinsic::llround,
3332 Intrinsic::experimental_constrained_llround));
3333
3334 case Builtin::BIlrint:
3335 case Builtin::BIlrintf:
3336 case Builtin::BIlrintl:
3337 case Builtin::BI__builtin_lrint:
3338 case Builtin::BI__builtin_lrintf:
3339 case Builtin::BI__builtin_lrintl:
3340 case Builtin::BI__builtin_lrintf128:
3342 *this, E, Intrinsic::lrint,
3343 Intrinsic::experimental_constrained_lrint));
3344
3345 case Builtin::BIllrint:
3346 case Builtin::BIllrintf:
3347 case Builtin::BIllrintl:
3348 case Builtin::BI__builtin_llrint:
3349 case Builtin::BI__builtin_llrintf:
3350 case Builtin::BI__builtin_llrintl:
3351 case Builtin::BI__builtin_llrintf128:
3353 *this, E, Intrinsic::llrint,
3354 Intrinsic::experimental_constrained_llrint));
3355 case Builtin::BI__builtin_ldexp:
3356 case Builtin::BI__builtin_ldexpf:
3357 case Builtin::BI__builtin_ldexpl:
3358 case Builtin::BI__builtin_ldexpf16:
3359 case Builtin::BI__builtin_ldexpf128: {
3361 *this, E, Intrinsic::ldexp,
3362 Intrinsic::experimental_constrained_ldexp));
3363 }
3364 default:
3365 break;
3366 }
3367 }
3368
3369 // Check NonnullAttribute/NullabilityArg and Alignment.
3370 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3371 unsigned ParmNum) {
3372 Value *Val = A.emitRawPointer(*this);
3373 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3374 ParmNum);
3375
3376 if (SanOpts.has(SanitizerKind::Alignment)) {
3377 SanitizerSet SkippedChecks;
3378 SkippedChecks.set(SanitizerKind::All);
3379 SkippedChecks.clear(SanitizerKind::Alignment);
3380 SourceLocation Loc = Arg->getExprLoc();
3381 // Strip an implicit cast.
3382 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3383 if (CE->getCastKind() == CK_BitCast)
3384 Arg = CE->getSubExpr();
3385 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3386 SkippedChecks);
3387 }
3388 };
3389
3390 switch (BuiltinIDIfNoAsmLabel) {
3391 default: break;
3392 case Builtin::BI__builtin___CFStringMakeConstantString:
3393 case Builtin::BI__builtin___NSStringMakeConstantString:
3394 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3395 case Builtin::BI__builtin_stdarg_start:
3396 case Builtin::BI__builtin_va_start:
3397 case Builtin::BI__va_start:
3398 case Builtin::BI__builtin_va_end:
3399 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3400 ? EmitScalarExpr(E->getArg(0))
3401 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3402 BuiltinID != Builtin::BI__builtin_va_end);
3403 return RValue::get(nullptr);
3404 case Builtin::BI__builtin_va_copy: {
3405 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3406 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3407 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3408 {DstPtr, SrcPtr});
3409 return RValue::get(nullptr);
3410 }
3411 case Builtin::BIabs:
3412 case Builtin::BIlabs:
3413 case Builtin::BIllabs:
3414 case Builtin::BI__builtin_abs:
3415 case Builtin::BI__builtin_labs:
3416 case Builtin::BI__builtin_llabs: {
3417 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3418
3419 Value *Result;
3420 switch (getLangOpts().getSignedOverflowBehavior()) {
3422 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3423 break;
3425 if (!SanitizeOverflow) {
3426 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3427 break;
3428 }
3429 [[fallthrough]];
3431 // TODO: Somehow handle the corner case when the address of abs is taken.
3432 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3433 break;
3434 }
3435 return RValue::get(Result);
3436 }
3437 case Builtin::BI__builtin_complex: {
3438 Value *Real = EmitScalarExpr(E->getArg(0));
3439 Value *Imag = EmitScalarExpr(E->getArg(1));
3440 return RValue::getComplex({Real, Imag});
3441 }
3442 case Builtin::BI__builtin_conj:
3443 case Builtin::BI__builtin_conjf:
3444 case Builtin::BI__builtin_conjl:
3445 case Builtin::BIconj:
3446 case Builtin::BIconjf:
3447 case Builtin::BIconjl: {
3448 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3449 Value *Real = ComplexVal.first;
3450 Value *Imag = ComplexVal.second;
3451 Imag = Builder.CreateFNeg(Imag, "neg");
3452 return RValue::getComplex(std::make_pair(Real, Imag));
3453 }
3454 case Builtin::BI__builtin_creal:
3455 case Builtin::BI__builtin_crealf:
3456 case Builtin::BI__builtin_creall:
3457 case Builtin::BIcreal:
3458 case Builtin::BIcrealf:
3459 case Builtin::BIcreall: {
3460 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3461 return RValue::get(ComplexVal.first);
3462 }
3463
3464 case Builtin::BI__builtin_preserve_access_index: {
3465 // Only enabled preserved access index region when debuginfo
3466 // is available as debuginfo is needed to preserve user-level
3467 // access pattern.
3468 if (!getDebugInfo()) {
3469 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3470 return RValue::get(EmitScalarExpr(E->getArg(0)));
3471 }
3472
3473 // Nested builtin_preserve_access_index() not supported
3475 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3476 return RValue::get(EmitScalarExpr(E->getArg(0)));
3477 }
3478
3479 IsInPreservedAIRegion = true;
3480 Value *Res = EmitScalarExpr(E->getArg(0));
3481 IsInPreservedAIRegion = false;
3482 return RValue::get(Res);
3483 }
3484
3485 case Builtin::BI__builtin_cimag:
3486 case Builtin::BI__builtin_cimagf:
3487 case Builtin::BI__builtin_cimagl:
3488 case Builtin::BIcimag:
3489 case Builtin::BIcimagf:
3490 case Builtin::BIcimagl: {
3491 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3492 return RValue::get(ComplexVal.second);
3493 }
3494
3495 case Builtin::BI__builtin_clrsb:
3496 case Builtin::BI__builtin_clrsbl:
3497 case Builtin::BI__builtin_clrsbll: {
3498 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3499 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3500
3501 llvm::Type *ArgType = ArgValue->getType();
3502 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3503
3504 llvm::Type *ResultType = ConvertType(E->getType());
3505 Value *Zero = llvm::Constant::getNullValue(ArgType);
3506 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3507 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3508 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3509 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3510 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3511 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3512 "cast");
3513 return RValue::get(Result);
3514 }
3515 case Builtin::BI__builtin_ctzs:
3516 case Builtin::BI__builtin_ctz:
3517 case Builtin::BI__builtin_ctzl:
3518 case Builtin::BI__builtin_ctzll:
3519 case Builtin::BI__builtin_ctzg: {
3520 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3521 E->getNumArgs() > 1;
3522
3523 Value *ArgValue =
3524 HasFallback ? EmitScalarExpr(E->getArg(0))
3526
3527 llvm::Type *ArgType = ArgValue->getType();
3528 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3529
3530 llvm::Type *ResultType = ConvertType(E->getType());
3531 Value *ZeroUndef =
3532 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3533 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3534 if (Result->getType() != ResultType)
3535 Result =
3536 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3537 if (!HasFallback)
3538 return RValue::get(Result);
3539
3540 Value *Zero = Constant::getNullValue(ArgType);
3541 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3542 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3543 Value *ResultOrFallback =
3544 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3545 return RValue::get(ResultOrFallback);
3546 }
3547 case Builtin::BI__builtin_clzs:
3548 case Builtin::BI__builtin_clz:
3549 case Builtin::BI__builtin_clzl:
3550 case Builtin::BI__builtin_clzll:
3551 case Builtin::BI__builtin_clzg: {
3552 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3553 E->getNumArgs() > 1;
3554
3555 Value *ArgValue =
3556 HasFallback ? EmitScalarExpr(E->getArg(0))
3558
3559 llvm::Type *ArgType = ArgValue->getType();
3560 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3561
3562 llvm::Type *ResultType = ConvertType(E->getType());
3563 Value *ZeroUndef =
3564 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3565 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3566 if (Result->getType() != ResultType)
3567 Result =
3568 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3569 if (!HasFallback)
3570 return RValue::get(Result);
3571
3572 Value *Zero = Constant::getNullValue(ArgType);
3573 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3574 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3575 Value *ResultOrFallback =
3576 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3577 return RValue::get(ResultOrFallback);
3578 }
3579 case Builtin::BI__builtin_ffs:
3580 case Builtin::BI__builtin_ffsl:
3581 case Builtin::BI__builtin_ffsll: {
3582 // ffs(x) -> x ? cttz(x) + 1 : 0
3583 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3584
3585 llvm::Type *ArgType = ArgValue->getType();
3586 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3587
3588 llvm::Type *ResultType = ConvertType(E->getType());
3589 Value *Tmp =
3590 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3591 llvm::ConstantInt::get(ArgType, 1));
3592 Value *Zero = llvm::Constant::getNullValue(ArgType);
3593 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3594 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3595 if (Result->getType() != ResultType)
3596 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3597 "cast");
3598 return RValue::get(Result);
3599 }
3600 case Builtin::BI__builtin_parity:
3601 case Builtin::BI__builtin_parityl:
3602 case Builtin::BI__builtin_parityll: {
3603 // parity(x) -> ctpop(x) & 1
3604 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3605
3606 llvm::Type *ArgType = ArgValue->getType();
3607 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3608
3609 llvm::Type *ResultType = ConvertType(E->getType());
3610 Value *Tmp = Builder.CreateCall(F, ArgValue);
3611 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3612 if (Result->getType() != ResultType)
3613 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3614 "cast");
3615 return RValue::get(Result);
3616 }
3617 case Builtin::BI__lzcnt16:
3618 case Builtin::BI__lzcnt:
3619 case Builtin::BI__lzcnt64: {
3620 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3621
3622 llvm::Type *ArgType = ArgValue->getType();
3623 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3624
3625 llvm::Type *ResultType = ConvertType(E->getType());
3626 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3627 if (Result->getType() != ResultType)
3628 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3629 "cast");
3630 return RValue::get(Result);
3631 }
3632 case Builtin::BI__popcnt16:
3633 case Builtin::BI__popcnt:
3634 case Builtin::BI__popcnt64:
3635 case Builtin::BI__builtin_popcount:
3636 case Builtin::BI__builtin_popcountl:
3637 case Builtin::BI__builtin_popcountll:
3638 case Builtin::BI__builtin_popcountg: {
3639 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3640
3641 llvm::Type *ArgType = ArgValue->getType();
3642 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3643
3644 llvm::Type *ResultType = ConvertType(E->getType());
3645 Value *Result = Builder.CreateCall(F, ArgValue);
3646 if (Result->getType() != ResultType)
3647 Result =
3648 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3649 return RValue::get(Result);
3650 }
3651 case Builtin::BI__builtin_unpredictable: {
3652 // Always return the argument of __builtin_unpredictable. LLVM does not
3653 // handle this builtin. Metadata for this builtin should be added directly
3654 // to instructions such as branches or switches that use it.
3655 return RValue::get(EmitScalarExpr(E->getArg(0)));
3656 }
3657 case Builtin::BI__builtin_expect: {
3658 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3659 llvm::Type *ArgType = ArgValue->getType();
3660
3661 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3662 // Don't generate llvm.expect on -O0 as the backend won't use it for
3663 // anything.
3664 // Note, we still IRGen ExpectedValue because it could have side-effects.
3665 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3666 return RValue::get(ArgValue);
3667
3668 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3669 Value *Result =
3670 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3671 return RValue::get(Result);
3672 }
3673 case Builtin::BI__builtin_expect_with_probability: {
3674 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3675 llvm::Type *ArgType = ArgValue->getType();
3676
3677 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3678 llvm::APFloat Probability(0.0);
3679 const Expr *ProbArg = E->getArg(2);
3680 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3681 assert(EvalSucceed && "probability should be able to evaluate as float");
3682 (void)EvalSucceed;
3683 bool LoseInfo = false;
3684 Probability.convert(llvm::APFloat::IEEEdouble(),
3685 llvm::RoundingMode::Dynamic, &LoseInfo);
3686 llvm::Type *Ty = ConvertType(ProbArg->getType());
3687 Constant *Confidence = ConstantFP::get(Ty, Probability);
3688 // Don't generate llvm.expect.with.probability on -O0 as the backend
3689 // won't use it for anything.
3690 // Note, we still IRGen ExpectedValue because it could have side-effects.
3691 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3692 return RValue::get(ArgValue);
3693
3694 Function *FnExpect =
3695 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3696 Value *Result = Builder.CreateCall(
3697 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3698 return RValue::get(Result);
3699 }
3700 case Builtin::BI__builtin_assume_aligned: {
3701 const Expr *Ptr = E->getArg(0);
3702 Value *PtrValue = EmitScalarExpr(Ptr);
3703 Value *OffsetValue =
3704 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3705
3706 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3707 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3708 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3709 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3710 llvm::Value::MaximumAlignment);
3711
3712 emitAlignmentAssumption(PtrValue, Ptr,
3713 /*The expr loc is sufficient.*/ SourceLocation(),
3714 AlignmentCI, OffsetValue);
3715 return RValue::get(PtrValue);
3716 }
3717 case Builtin::BI__assume:
3718 case Builtin::BI__builtin_assume: {
3719 if (E->getArg(0)->HasSideEffects(getContext()))
3720 return RValue::get(nullptr);
3721
3722 Value *ArgValue = EmitCheckedArgForAssume(E->getArg(0));
3723 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3724 Builder.CreateCall(FnAssume, ArgValue);
3725 return RValue::get(nullptr);
3726 }
3727 case Builtin::BI__builtin_assume_separate_storage: {
3728 const Expr *Arg0 = E->getArg(0);
3729 const Expr *Arg1 = E->getArg(1);
3730
3731 Value *Value0 = EmitScalarExpr(Arg0);
3732 Value *Value1 = EmitScalarExpr(Arg1);
3733
3734 Value *Values[] = {Value0, Value1};
3735 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3736 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3737 return RValue::get(nullptr);
3738 }
3739 case Builtin::BI__builtin_allow_runtime_check: {
3740 StringRef Kind =
3741 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3742 LLVMContext &Ctx = CGM.getLLVMContext();
3743 llvm::Value *Allow = Builder.CreateCall(
3744 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3745 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3746 return RValue::get(Allow);
3747 }
3748 case Builtin::BI__arithmetic_fence: {
3749 // Create the builtin call if FastMath is selected, and the target
3750 // supports the builtin, otherwise just return the argument.
3751 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3752 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3753 bool isArithmeticFenceEnabled =
3754 FMF.allowReassoc() &&
3756 QualType ArgType = E->getArg(0)->getType();
3757 if (ArgType->isComplexType()) {
3758 if (isArithmeticFenceEnabled) {
3759 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3760 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3761 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3762 ConvertType(ElementType));
3763 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3764 ConvertType(ElementType));
3765 return RValue::getComplex(std::make_pair(Real, Imag));
3766 }
3767 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3768 Value *Real = ComplexVal.first;
3769 Value *Imag = ComplexVal.second;
3770 return RValue::getComplex(std::make_pair(Real, Imag));
3771 }
3772 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3773 if (isArithmeticFenceEnabled)
3774 return RValue::get(
3775 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3776 return RValue::get(ArgValue);
3777 }
3778 case Builtin::BI__builtin_bswap16:
3779 case Builtin::BI__builtin_bswap32:
3780 case Builtin::BI__builtin_bswap64:
3781 case Builtin::BI_byteswap_ushort:
3782 case Builtin::BI_byteswap_ulong:
3783 case Builtin::BI_byteswap_uint64: {
3784 return RValue::get(
3785 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3786 }
3787 case Builtin::BI__builtin_bitreverse8:
3788 case Builtin::BI__builtin_bitreverse16:
3789 case Builtin::BI__builtin_bitreverse32:
3790 case Builtin::BI__builtin_bitreverse64: {
3791 return RValue::get(
3792 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3793 }
3794 case Builtin::BI__builtin_rotateleft8:
3795 case Builtin::BI__builtin_rotateleft16:
3796 case Builtin::BI__builtin_rotateleft32:
3797 case Builtin::BI__builtin_rotateleft64:
3798 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3799 case Builtin::BI_rotl16:
3800 case Builtin::BI_rotl:
3801 case Builtin::BI_lrotl:
3802 case Builtin::BI_rotl64:
3803 return emitRotate(E, false);
3804
3805 case Builtin::BI__builtin_rotateright8:
3806 case Builtin::BI__builtin_rotateright16:
3807 case Builtin::BI__builtin_rotateright32:
3808 case Builtin::BI__builtin_rotateright64:
3809 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3810 case Builtin::BI_rotr16:
3811 case Builtin::BI_rotr:
3812 case Builtin::BI_lrotr:
3813 case Builtin::BI_rotr64:
3814 return emitRotate(E, true);
3815
3816 case Builtin::BI__builtin_constant_p: {
3817 llvm::Type *ResultType = ConvertType(E->getType());
3818
3819 const Expr *Arg = E->getArg(0);
3820 QualType ArgType = Arg->getType();
3821 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3822 // and likely a mistake.
3823 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3824 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3825 // Per the GCC documentation, only numeric constants are recognized after
3826 // inlining.
3827 return RValue::get(ConstantInt::get(ResultType, 0));
3828
3829 if (Arg->HasSideEffects(getContext()))
3830 // The argument is unevaluated, so be conservative if it might have
3831 // side-effects.
3832 return RValue::get(ConstantInt::get(ResultType, 0));
3833
3834 Value *ArgValue = EmitScalarExpr(Arg);
3835 if (ArgType->isObjCObjectPointerType()) {
3836 // Convert Objective-C objects to id because we cannot distinguish between
3837 // LLVM types for Obj-C classes as they are opaque.
3838 ArgType = CGM.getContext().getObjCIdType();
3839 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3840 }
3841 Function *F =
3842 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3843 Value *Result = Builder.CreateCall(F, ArgValue);
3844 if (Result->getType() != ResultType)
3845 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3846 return RValue::get(Result);
3847 }
3848 case Builtin::BI__builtin_dynamic_object_size:
3849 case Builtin::BI__builtin_object_size: {
3850 unsigned Type =
3851 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3852 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3853
3854 // We pass this builtin onto the optimizer so that it can figure out the
3855 // object size in more complex cases.
3856 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3857 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3858 /*EmittedE=*/nullptr, IsDynamic));
3859 }
3860 case Builtin::BI__builtin_counted_by_ref: {
3861 // Default to returning '(void *) 0'.
3862 llvm::Value *Result = llvm::ConstantPointerNull::get(
3863 llvm::PointerType::getUnqual(getLLVMContext()));
3864
3865 const Expr *Arg = E->getArg(0)->IgnoreParenImpCasts();
3866
3867 if (auto *UO = dyn_cast<UnaryOperator>(Arg);
3868 UO && UO->getOpcode() == UO_AddrOf) {
3869 Arg = UO->getSubExpr()->IgnoreParenImpCasts();
3870
3871 if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Arg))
3872 Arg = ASE->getBase()->IgnoreParenImpCasts();
3873 }
3874
3875 if (const MemberExpr *ME = dyn_cast_if_present<MemberExpr>(Arg)) {
3876 if (auto *CATy =
3877 ME->getMemberDecl()->getType()->getAs<CountAttributedType>();
3878 CATy && CATy->getKind() == CountAttributedType::CountedBy) {
3879 const auto *FAMDecl = cast<FieldDecl>(ME->getMemberDecl());
3880 if (const FieldDecl *CountFD = FAMDecl->findCountedByField())
3881 Result = GetCountedByFieldExprGEP(Arg, FAMDecl, CountFD);
3882 else
3883 llvm::report_fatal_error("Cannot find the counted_by 'count' field");
3884 }
3885 }
3886
3887 return RValue::get(Result);
3888 }
3889 case Builtin::BI__builtin_prefetch: {
3890 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3891 // FIXME: Technically these constants should of type 'int', yes?
3892 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3893 llvm::ConstantInt::get(Int32Ty, 0);
3894 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3895 llvm::ConstantInt::get(Int32Ty, 3);
3896 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3897 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3898 Builder.CreateCall(F, {Address, RW, Locality, Data});
3899 return RValue::get(nullptr);
3900 }
3901 case Builtin::BI__builtin_readcyclecounter: {
3902 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3903 return RValue::get(Builder.CreateCall(F));
3904 }
3905 case Builtin::BI__builtin_readsteadycounter: {
3906 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3907 return RValue::get(Builder.CreateCall(F));
3908 }
3909 case Builtin::BI__builtin___clear_cache: {
3910 Value *Begin = EmitScalarExpr(E->getArg(0));
3911 Value *End = EmitScalarExpr(E->getArg(1));
3912 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3913 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3914 }
3915 case Builtin::BI__builtin_trap:
3916 EmitTrapCall(Intrinsic::trap);
3917 return RValue::get(nullptr);
3918 case Builtin::BI__builtin_verbose_trap: {
3919 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3920 if (getDebugInfo()) {
3921 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3922 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3923 *E->getArg(1)->tryEvaluateString(getContext()));
3924 }
3925 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3926 // Currently no attempt is made to prevent traps from being merged.
3927 EmitTrapCall(Intrinsic::trap);
3928 return RValue::get(nullptr);
3929 }
3930 case Builtin::BI__debugbreak:
3931 EmitTrapCall(Intrinsic::debugtrap);
3932 return RValue::get(nullptr);
3933 case Builtin::BI__builtin_unreachable: {
3935
3936 // We do need to preserve an insertion point.
3937 EmitBlock(createBasicBlock("unreachable.cont"));
3938
3939 return RValue::get(nullptr);
3940 }
3941
3942 case Builtin::BI__builtin_powi:
3943 case Builtin::BI__builtin_powif:
3944 case Builtin::BI__builtin_powil: {
3945 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3946 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3947
3948 if (Builder.getIsFPConstrained()) {
3949 // FIXME: llvm.powi has 2 mangling types,
3950 // llvm.experimental.constrained.powi has one.
3951 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3952 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3953 Src0->getType());
3954 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3955 }
3956
3957 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3958 { Src0->getType(), Src1->getType() });
3959 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3960 }
3961 case Builtin::BI__builtin_frexpl: {
3962 // Linux PPC will not be adding additional PPCDoubleDouble support.
3963 // WIP to switch default to IEEE long double. Will emit libcall for
3964 // frexpl instead of legalizing this type in the BE.
3965 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3966 break;
3967 [[fallthrough]];
3968 }
3969 case Builtin::BI__builtin_frexp:
3970 case Builtin::BI__builtin_frexpf:
3971 case Builtin::BI__builtin_frexpf128:
3972 case Builtin::BI__builtin_frexpf16:
3973 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3974 case Builtin::BI__builtin_isgreater:
3975 case Builtin::BI__builtin_isgreaterequal:
3976 case Builtin::BI__builtin_isless:
3977 case Builtin::BI__builtin_islessequal:
3978 case Builtin::BI__builtin_islessgreater:
3979 case Builtin::BI__builtin_isunordered: {
3980 // Ordered comparisons: we know the arguments to these are matching scalar
3981 // floating point values.
3982 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3983 Value *LHS = EmitScalarExpr(E->getArg(0));
3984 Value *RHS = EmitScalarExpr(E->getArg(1));
3985
3986 switch (BuiltinID) {
3987 default: llvm_unreachable("Unknown ordered comparison");
3988 case Builtin::BI__builtin_isgreater:
3989 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3990 break;
3991 case Builtin::BI__builtin_isgreaterequal:
3992 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3993 break;
3994 case Builtin::BI__builtin_isless:
3995 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3996 break;
3997 case Builtin::BI__builtin_islessequal:
3998 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3999 break;
4000 case Builtin::BI__builtin_islessgreater:
4001 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
4002 break;
4003 case Builtin::BI__builtin_isunordered:
4004 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
4005 break;
4006 }
4007 // ZExt bool to int type.
4008 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
4009 }
4010
4011 case Builtin::BI__builtin_isnan: {
4012 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4013 Value *V = EmitScalarExpr(E->getArg(0));
4014 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4015 return RValue::get(Result);
4016 return RValue::get(
4017 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
4018 ConvertType(E->getType())));
4019 }
4020
4021 case Builtin::BI__builtin_issignaling: {
4022 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4023 Value *V = EmitScalarExpr(E->getArg(0));
4024 return RValue::get(
4025 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
4026 ConvertType(E->getType())));
4027 }
4028
4029 case Builtin::BI__builtin_isinf: {
4030 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4031 Value *V = EmitScalarExpr(E->getArg(0));
4032 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4033 return RValue::get(Result);
4034 return RValue::get(
4035 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
4036 ConvertType(E->getType())));
4037 }
4038
4039 case Builtin::BIfinite:
4040 case Builtin::BI__finite:
4041 case Builtin::BIfinitef:
4042 case Builtin::BI__finitef:
4043 case Builtin::BIfinitel:
4044 case Builtin::BI__finitel:
4045 case Builtin::BI__builtin_isfinite: {
4046 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4047 Value *V = EmitScalarExpr(E->getArg(0));
4048 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4049 return RValue::get(Result);
4050 return RValue::get(
4051 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
4052 ConvertType(E->getType())));
4053 }
4054
4055 case Builtin::BI__builtin_isnormal: {
4056 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4057 Value *V = EmitScalarExpr(E->getArg(0));
4058 return RValue::get(
4059 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
4060 ConvertType(E->getType())));
4061 }
4062
4063 case Builtin::BI__builtin_issubnormal: {
4064 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4065 Value *V = EmitScalarExpr(E->getArg(0));
4066 return RValue::get(
4067 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
4068 ConvertType(E->getType())));
4069 }
4070
4071 case Builtin::BI__builtin_iszero: {
4072 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4073 Value *V = EmitScalarExpr(E->getArg(0));
4074 return RValue::get(
4075 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
4076 ConvertType(E->getType())));
4077 }
4078
4079 case Builtin::BI__builtin_isfpclass: {
4081 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
4082 break;
4083 uint64_t Test = Result.Val.getInt().getLimitedValue();
4084 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4085 Value *V = EmitScalarExpr(E->getArg(0));
4086 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
4087 ConvertType(E->getType())));
4088 }
4089
4090 case Builtin::BI__builtin_nondeterministic_value: {
4091 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
4092
4093 Value *Result = PoisonValue::get(Ty);
4094 Result = Builder.CreateFreeze(Result);
4095
4096 return RValue::get(Result);
4097 }
4098
4099 case Builtin::BI__builtin_elementwise_abs: {
4100 Value *Result;
4101 QualType QT = E->getArg(0)->getType();
4102
4103 if (auto *VecTy = QT->getAs<VectorType>())
4104 QT = VecTy->getElementType();
4105 if (QT->isIntegerType())
4106 Result = Builder.CreateBinaryIntrinsic(
4107 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
4108 Builder.getFalse(), nullptr, "elt.abs");
4109 else
4110 Result = emitBuiltinWithOneOverloadedType<1>(
4111 *this, E, llvm::Intrinsic::fabs, "elt.abs");
4112
4113 return RValue::get(Result);
4114 }
4115 case Builtin::BI__builtin_elementwise_acos:
4116 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4117 *this, E, llvm::Intrinsic::acos, "elt.acos"));
4118 case Builtin::BI__builtin_elementwise_asin:
4119 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4120 *this, E, llvm::Intrinsic::asin, "elt.asin"));
4121 case Builtin::BI__builtin_elementwise_atan:
4122 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4123 *this, E, llvm::Intrinsic::atan, "elt.atan"));
4124 case Builtin::BI__builtin_elementwise_atan2:
4125 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4126 *this, E, llvm::Intrinsic::atan2, "elt.atan2"));
4127 case Builtin::BI__builtin_elementwise_ceil:
4128 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4129 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
4130 case Builtin::BI__builtin_elementwise_exp:
4131 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4132 *this, E, llvm::Intrinsic::exp, "elt.exp"));
4133 case Builtin::BI__builtin_elementwise_exp2:
4134 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4135 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
4136 case Builtin::BI__builtin_elementwise_log:
4137 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4138 *this, E, llvm::Intrinsic::log, "elt.log"));
4139 case Builtin::BI__builtin_elementwise_log2:
4140 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4141 *this, E, llvm::Intrinsic::log2, "elt.log2"));
4142 case Builtin::BI__builtin_elementwise_log10:
4143 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4144 *this, E, llvm::Intrinsic::log10, "elt.log10"));
4145 case Builtin::BI__builtin_elementwise_pow: {
4146 return RValue::get(
4147 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
4148 }
4149 case Builtin::BI__builtin_elementwise_bitreverse:
4150 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4151 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
4152 case Builtin::BI__builtin_elementwise_cos:
4153 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4154 *this, E, llvm::Intrinsic::cos, "elt.cos"));
4155 case Builtin::BI__builtin_elementwise_cosh:
4156 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4157 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
4158 case Builtin::BI__builtin_elementwise_floor:
4159 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4160 *this, E, llvm::Intrinsic::floor, "elt.floor"));
4161 case Builtin::BI__builtin_elementwise_popcount:
4162 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4163 *this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
4164 case Builtin::BI__builtin_elementwise_roundeven:
4165 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4166 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
4167 case Builtin::BI__builtin_elementwise_round:
4168 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4169 *this, E, llvm::Intrinsic::round, "elt.round"));
4170 case Builtin::BI__builtin_elementwise_rint:
4171 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4172 *this, E, llvm::Intrinsic::rint, "elt.rint"));
4173 case Builtin::BI__builtin_elementwise_nearbyint:
4174 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4175 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
4176 case Builtin::BI__builtin_elementwise_sin:
4177 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4178 *this, E, llvm::Intrinsic::sin, "elt.sin"));
4179 case Builtin::BI__builtin_elementwise_sinh:
4180 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4181 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
4182 case Builtin::BI__builtin_elementwise_tan:
4183 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4184 *this, E, llvm::Intrinsic::tan, "elt.tan"));
4185 case Builtin::BI__builtin_elementwise_tanh:
4186 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4187 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
4188 case Builtin::BI__builtin_elementwise_trunc:
4189 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4190 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
4191 case Builtin::BI__builtin_elementwise_canonicalize:
4192 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4193 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
4194 case Builtin::BI__builtin_elementwise_copysign:
4195 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4196 *this, E, llvm::Intrinsic::copysign));
4197 case Builtin::BI__builtin_elementwise_fma:
4198 return RValue::get(
4199 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
4200 case Builtin::BI__builtin_elementwise_add_sat:
4201 case Builtin::BI__builtin_elementwise_sub_sat: {
4202 Value *Op0 = EmitScalarExpr(E->getArg(0));
4203 Value *Op1 = EmitScalarExpr(E->getArg(1));
4204 Value *Result;
4205 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
4206 QualType Ty = E->getArg(0)->getType();
4207 if (auto *VecTy = Ty->getAs<VectorType>())
4208 Ty = VecTy->getElementType();
4209 bool IsSigned = Ty->isSignedIntegerType();
4210 unsigned Opc;
4211 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
4212 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
4213 else
4214 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
4215 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
4216 return RValue::get(Result);
4217 }
4218
4219 case Builtin::BI__builtin_elementwise_max: {
4220 Value *Op0 = EmitScalarExpr(E->getArg(0));
4221 Value *Op1 = EmitScalarExpr(E->getArg(1));
4222 Value *Result;
4223 if (Op0->getType()->isIntOrIntVectorTy()) {
4224 QualType Ty = E->getArg(0)->getType();
4225 if (auto *VecTy = Ty->getAs<VectorType>())
4226 Ty = VecTy->getElementType();
4227 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4228 ? llvm::Intrinsic::smax
4229 : llvm::Intrinsic::umax,
4230 Op0, Op1, nullptr, "elt.max");
4231 } else
4232 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
4233 return RValue::get(Result);
4234 }
4235 case Builtin::BI__builtin_elementwise_min: {
4236 Value *Op0 = EmitScalarExpr(E->getArg(0));
4237 Value *Op1 = EmitScalarExpr(E->getArg(1));
4238 Value *Result;
4239 if (Op0->getType()->isIntOrIntVectorTy()) {
4240 QualType Ty = E->getArg(0)->getType();
4241 if (auto *VecTy = Ty->getAs<VectorType>())
4242 Ty = VecTy->getElementType();
4243 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4244 ? llvm::Intrinsic::smin
4245 : llvm::Intrinsic::umin,
4246 Op0, Op1, nullptr, "elt.min");
4247 } else
4248 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
4249 return RValue::get(Result);
4250 }
4251
4252 case Builtin::BI__builtin_elementwise_maximum: {
4253 Value *Op0 = EmitScalarExpr(E->getArg(0));
4254 Value *Op1 = EmitScalarExpr(E->getArg(1));
4255 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
4256 Op1, nullptr, "elt.maximum");
4257 return RValue::get(Result);
4258 }
4259
4260 case Builtin::BI__builtin_elementwise_minimum: {
4261 Value *Op0 = EmitScalarExpr(E->getArg(0));
4262 Value *Op1 = EmitScalarExpr(E->getArg(1));
4263 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
4264 Op1, nullptr, "elt.minimum");
4265 return RValue::get(Result);
4266 }
4267
4268 case Builtin::BI__builtin_reduce_max: {
4269 auto GetIntrinsicID = [this](QualType QT) {
4270 if (auto *VecTy = QT->getAs<VectorType>())
4271 QT = VecTy->getElementType();
4272 else if (QT->isSizelessVectorType())
4274
4275 if (QT->isSignedIntegerType())
4276 return llvm::Intrinsic::vector_reduce_smax;
4277 if (QT->isUnsignedIntegerType())
4278 return llvm::Intrinsic::vector_reduce_umax;
4279 assert(QT->isFloatingType() && "must have a float here");
4280 return llvm::Intrinsic::vector_reduce_fmax;
4281 };
4282 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4283 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4284 }
4285
4286 case Builtin::BI__builtin_reduce_min: {
4287 auto GetIntrinsicID = [this](QualType QT) {
4288 if (auto *VecTy = QT->getAs<VectorType>())
4289 QT = VecTy->getElementType();
4290 else if (QT->isSizelessVectorType())
4292
4293 if (QT->isSignedIntegerType())
4294 return llvm::Intrinsic::vector_reduce_smin;
4295 if (QT->isUnsignedIntegerType())
4296 return llvm::Intrinsic::vector_reduce_umin;
4297 assert(QT->isFloatingType() && "must have a float here");
4298 return llvm::Intrinsic::vector_reduce_fmin;
4299 };
4300
4301 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4302 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4303 }
4304
4305 case Builtin::BI__builtin_reduce_add:
4306 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4307 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
4308 case Builtin::BI__builtin_reduce_mul:
4309 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4310 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
4311 case Builtin::BI__builtin_reduce_xor:
4312 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4313 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
4314 case Builtin::BI__builtin_reduce_or:
4315 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4316 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
4317 case Builtin::BI__builtin_reduce_and:
4318 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4319 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
4320 case Builtin::BI__builtin_reduce_maximum:
4321 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4322 *this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
4323 case Builtin::BI__builtin_reduce_minimum:
4324 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4325 *this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
4326
4327 case Builtin::BI__builtin_matrix_transpose: {
4328 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
4329 Value *MatValue = EmitScalarExpr(E->getArg(0));
4330 MatrixBuilder MB(Builder);
4331 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
4332 MatrixTy->getNumColumns());
4333 return RValue::get(Result);
4334 }
4335
4336 case Builtin::BI__builtin_matrix_column_major_load: {
4337 MatrixBuilder MB(Builder);
4338 // Emit everything that isn't dependent on the first parameter type
4339 Value *Stride = EmitScalarExpr(E->getArg(3));
4340 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
4341 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
4342 assert(PtrTy && "arg0 must be of pointer type");
4343 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4344
4345 Address Src = EmitPointerWithAlignment(E->getArg(0));
4347 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4348 0);
4349 Value *Result = MB.CreateColumnMajorLoad(
4350 Src.getElementType(), Src.emitRawPointer(*this),
4351 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4352 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4353 return RValue::get(Result);
4354 }
4355
4356 case Builtin::BI__builtin_matrix_column_major_store: {
4357 MatrixBuilder MB(Builder);
4358 Value *Matrix = EmitScalarExpr(E->getArg(0));
4359 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4360 Value *Stride = EmitScalarExpr(E->getArg(2));
4361
4362 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4363 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4364 assert(PtrTy && "arg1 must be of pointer type");
4365 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4366
4368 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4369 0);
4370 Value *Result = MB.CreateColumnMajorStore(
4371 Matrix, Dst.emitRawPointer(*this),
4372 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4373 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4374 return RValue::get(Result);
4375 }
4376
4377 case Builtin::BI__builtin_isinf_sign: {
4378 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4379 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4380 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4381 Value *Arg = EmitScalarExpr(E->getArg(0));
4382 Value *AbsArg = EmitFAbs(*this, Arg);
4383 Value *IsInf = Builder.CreateFCmpOEQ(
4384 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4385 Value *IsNeg = EmitSignBit(*this, Arg);
4386
4387 llvm::Type *IntTy = ConvertType(E->getType());
4388 Value *Zero = Constant::getNullValue(IntTy);
4389 Value *One = ConstantInt::get(IntTy, 1);
4390 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4391 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4392 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4393 return RValue::get(Result);
4394 }
4395
4396 case Builtin::BI__builtin_flt_rounds: {
4397 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4398
4399 llvm::Type *ResultType = ConvertType(E->getType());
4400 Value *Result = Builder.CreateCall(F);
4401 if (Result->getType() != ResultType)
4402 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4403 "cast");
4404 return RValue::get(Result);
4405 }
4406
4407 case Builtin::BI__builtin_set_flt_rounds: {
4408 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4409
4410 Value *V = EmitScalarExpr(E->getArg(0));
4411 Builder.CreateCall(F, V);
4412 return RValue::get(nullptr);
4413 }
4414
4415 case Builtin::BI__builtin_fpclassify: {
4416 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4417 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4418 Value *V = EmitScalarExpr(E->getArg(5));
4419 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4420
4421 // Create Result
4422 BasicBlock *Begin = Builder.GetInsertBlock();
4423 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4424 Builder.SetInsertPoint(End);
4425 PHINode *Result =
4426 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4427 "fpclassify_result");
4428
4429 // if (V==0) return FP_ZERO
4430 Builder.SetInsertPoint(Begin);
4431 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4432 "iszero");
4433 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4434 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4435 Builder.CreateCondBr(IsZero, End, NotZero);
4436 Result->addIncoming(ZeroLiteral, Begin);
4437
4438 // if (V != V) return FP_NAN
4439 Builder.SetInsertPoint(NotZero);
4440 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4441 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4442 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4443 Builder.CreateCondBr(IsNan, End, NotNan);
4444 Result->addIncoming(NanLiteral, NotZero);
4445
4446 // if (fabs(V) == infinity) return FP_INFINITY
4447 Builder.SetInsertPoint(NotNan);
4448 Value *VAbs = EmitFAbs(*this, V);
4449 Value *IsInf =
4450 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4451 "isinf");
4452 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4453 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4454 Builder.CreateCondBr(IsInf, End, NotInf);
4455 Result->addIncoming(InfLiteral, NotNan);
4456
4457 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4458 Builder.SetInsertPoint(NotInf);
4459 APFloat Smallest = APFloat::getSmallestNormalized(
4460 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4461 Value *IsNormal =
4462 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4463 "isnormal");
4464 Value *NormalResult =
4465 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4466 EmitScalarExpr(E->getArg(3)));
4467 Builder.CreateBr(End);
4468 Result->addIncoming(NormalResult, NotInf);
4469
4470 // return Result
4471 Builder.SetInsertPoint(End);
4472 return RValue::get(Result);
4473 }
4474
4475 // An alloca will always return a pointer to the alloca (stack) address
4476 // space. This address space need not be the same as the AST / Language
4477 // default (e.g. in C / C++ auto vars are in the generic address space). At
4478 // the AST level this is handled within CreateTempAlloca et al., but for the
4479 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4480 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4481 case Builtin::BIalloca:
4482 case Builtin::BI_alloca:
4483 case Builtin::BI__builtin_alloca_uninitialized:
4484 case Builtin::BI__builtin_alloca: {
4485 Value *Size = EmitScalarExpr(E->getArg(0));
4486 const TargetInfo &TI = getContext().getTargetInfo();
4487 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4488 const Align SuitableAlignmentInBytes =
4489 CGM.getContext()
4491 .getAsAlign();
4492 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4493 AI->setAlignment(SuitableAlignmentInBytes);
4494 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4495 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4498 if (AAS != EAS) {
4499 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4500 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4501 EAS, Ty));
4502 }
4503 return RValue::get(AI);
4504 }
4505
4506 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4507 case Builtin::BI__builtin_alloca_with_align: {
4508 Value *Size = EmitScalarExpr(E->getArg(0));
4509 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4510 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4511 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4512 const Align AlignmentInBytes =
4513 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4514 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4515 AI->setAlignment(AlignmentInBytes);
4516 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4517 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4520 if (AAS != EAS) {
4521 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4522 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4523 EAS, Ty));
4524 }
4525 return RValue::get(AI);
4526 }
4527
4528 case Builtin::BIbzero:
4529 case Builtin::BI__builtin_bzero: {
4530 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4531 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4532 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4533 E->getArg(0)->getExprLoc(), FD, 0);
4534 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4535 return RValue::get(nullptr);
4536 }
4537
4538 case Builtin::BIbcopy:
4539 case Builtin::BI__builtin_bcopy: {
4540 Address Src = EmitPointerWithAlignment(E->getArg(0));
4541 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4542 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4544 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4545 0);
4547 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4548 0);
4549 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4550 return RValue::get(nullptr);
4551 }
4552
4553 case Builtin::BImemcpy:
4554 case Builtin::BI__builtin_memcpy:
4555 case Builtin::BImempcpy:
4556 case Builtin::BI__builtin_mempcpy: {
4557 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4558 Address Src = EmitPointerWithAlignment(E->getArg(1));
4559 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4560 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4561 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4562 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4563 if (BuiltinID == Builtin::BImempcpy ||
4564 BuiltinID == Builtin::BI__builtin_mempcpy)
4566 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4567 else
4568 return RValue::get(Dest, *this);
4569 }
4570
4571 case Builtin::BI__builtin_memcpy_inline: {
4572 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4573 Address Src = EmitPointerWithAlignment(E->getArg(1));
4574 uint64_t Size =
4575 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4576 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4577 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4578 Builder.CreateMemCpyInline(Dest, Src, Size);
4579 return RValue::get(nullptr);
4580 }
4581
4582 case Builtin::BI__builtin_char_memchr:
4583 BuiltinID = Builtin::BI__builtin_memchr;
4584 break;
4585
4586 case Builtin::BI__builtin___memcpy_chk: {
4587 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4588 Expr::EvalResult SizeResult, DstSizeResult;
4589 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4590 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4591 break;
4592 llvm::APSInt Size = SizeResult.Val.getInt();
4593 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4594 if (Size.ugt(DstSize))
4595 break;
4596 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4597 Address Src = EmitPointerWithAlignment(E->getArg(1));
4598 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4599 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4600 return RValue::get(Dest, *this);
4601 }
4602
4603 case Builtin::BI__builtin_objc_memmove_collectable: {
4604 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4605 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4606 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4608 DestAddr, SrcAddr, SizeVal);
4609 return RValue::get(DestAddr, *this);
4610 }
4611
4612 case Builtin::BI__builtin___memmove_chk: {
4613 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4614 Expr::EvalResult SizeResult, DstSizeResult;
4615 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4616 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4617 break;
4618 llvm::APSInt Size = SizeResult.Val.getInt();
4619 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4620 if (Size.ugt(DstSize))
4621 break;
4622 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4623 Address Src = EmitPointerWithAlignment(E->getArg(1));
4624 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4625 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4626 return RValue::get(Dest, *this);
4627 }
4628
4629 case Builtin::BImemmove:
4630 case Builtin::BI__builtin_memmove: {
4631 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4632 Address Src = EmitPointerWithAlignment(E->getArg(1));
4633 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4634 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4635 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4636 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4637 return RValue::get(Dest, *this);
4638 }
4639 case Builtin::BImemset:
4640 case Builtin::BI__builtin_memset: {
4641 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4642 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4643 Builder.getInt8Ty());
4644 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4645 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4646 E->getArg(0)->getExprLoc(), FD, 0);
4647 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4648 return RValue::get(Dest, *this);
4649 }
4650 case Builtin::BI__builtin_memset_inline: {
4651 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4652 Value *ByteVal =
4653 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4654 uint64_t Size =
4655 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4657 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4658 0);
4659 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4660 return RValue::get(nullptr);
4661 }
4662 case Builtin::BI__builtin___memset_chk: {
4663 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4664 Expr::EvalResult SizeResult, DstSizeResult;
4665 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4666 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4667 break;
4668 llvm::APSInt Size = SizeResult.Val.getInt();
4669 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4670 if (Size.ugt(DstSize))
4671 break;
4672 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4673 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4674 Builder.getInt8Ty());
4675 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4676 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4677 return RValue::get(Dest, *this);
4678 }
4679 case Builtin::BI__builtin_wmemchr: {
4680 // The MSVC runtime library does not provide a definition of wmemchr, so we
4681 // need an inline implementation.
4682 if (!getTarget().getTriple().isOSMSVCRT())
4683 break;
4684
4685 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4686 Value *Str = EmitScalarExpr(E->getArg(0));
4687 Value *Chr = EmitScalarExpr(E->getArg(1));
4688 Value *Size = EmitScalarExpr(E->getArg(2));
4689
4690 BasicBlock *Entry = Builder.GetInsertBlock();
4691 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4692 BasicBlock *Next = createBasicBlock("wmemchr.next");
4693 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4694 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4695 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4696
4697 EmitBlock(CmpEq);
4698 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4699 StrPhi->addIncoming(Str, Entry);
4700 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4701 SizePhi->addIncoming(Size, Entry);
4702 CharUnits WCharAlign =
4704 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4705 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4706 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4707 Builder.CreateCondBr(StrEqChr, Exit, Next);
4708
4709 EmitBlock(Next);
4710 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4711 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4712 Value *NextSizeEq0 =
4713 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4714 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4715 StrPhi->addIncoming(NextStr, Next);
4716 SizePhi->addIncoming(NextSize, Next);
4717
4718 EmitBlock(Exit);
4719 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4720 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4721 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4722 Ret->addIncoming(FoundChr, CmpEq);
4723 return RValue::get(Ret);
4724 }
4725 case Builtin::BI__builtin_wmemcmp: {
4726 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4727 // need an inline implementation.
4728 if (!getTarget().getTriple().isOSMSVCRT())
4729 break;
4730
4731 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4732
4733 Value *Dst = EmitScalarExpr(E->getArg(0));
4734 Value *Src = EmitScalarExpr(E->getArg(1));
4735 Value *Size = EmitScalarExpr(E->getArg(2));
4736
4737 BasicBlock *Entry = Builder.GetInsertBlock();
4738 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4739 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4740 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4741 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4742 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4743 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4744
4745 EmitBlock(CmpGT);
4746 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4747 DstPhi->addIncoming(Dst, Entry);
4748 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4749 SrcPhi->addIncoming(Src, Entry);
4750 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4751 SizePhi->addIncoming(Size, Entry);
4752 CharUnits WCharAlign =
4754 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4755 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4756 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4757 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4758
4759 EmitBlock(CmpLT);
4760 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4761 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4762
4763 EmitBlock(Next);
4764 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4765 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4766 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4767 Value *NextSizeEq0 =
4768 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4769 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4770 DstPhi->addIncoming(NextDst, Next);
4771 SrcPhi->addIncoming(NextSrc, Next);
4772 SizePhi->addIncoming(NextSize, Next);
4773
4774 EmitBlock(Exit);
4775 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4776 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4777 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4778 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4779 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4780 return RValue::get(Ret);
4781 }
4782 case Builtin::BI__builtin_dwarf_cfa: {
4783 // The offset in bytes from the first argument to the CFA.
4784 //
4785 // Why on earth is this in the frontend? Is there any reason at
4786 // all that the backend can't reasonably determine this while
4787 // lowering llvm.eh.dwarf.cfa()?
4788 //
4789 // TODO: If there's a satisfactory reason, add a target hook for
4790 // this instead of hard-coding 0, which is correct for most targets.
4791 int32_t Offset = 0;
4792
4793 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4794 return RValue::get(Builder.CreateCall(F,
4795 llvm::ConstantInt::get(Int32Ty, Offset)));
4796 }
4797 case Builtin::BI__builtin_return_address: {
4798 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4799 getContext().UnsignedIntTy);
4800 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4801 return RValue::get(Builder.CreateCall(F, Depth));
4802 }
4803 case Builtin::BI_ReturnAddress: {
4804 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4805 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4806 }
4807 case Builtin::BI__builtin_frame_address: {
4808 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4809 getContext().UnsignedIntTy);
4810 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4811 return RValue::get(Builder.CreateCall(F, Depth));
4812 }
4813 case Builtin::BI__builtin_extract_return_addr: {
4814 Value *Address = EmitScalarExpr(E->getArg(0));
4816 return RValue::get(Result);
4817 }
4818 case Builtin::BI__builtin_frob_return_addr: {
4819 Value *Address = EmitScalarExpr(E->getArg(0));
4821 return RValue::get(Result);
4822 }
4823 case Builtin::BI__builtin_dwarf_sp_column: {
4824 llvm::IntegerType *Ty
4825 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4827 if (Column == -1) {
4828 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4829 return RValue::get(llvm::UndefValue::get(Ty));
4830 }
4831 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4832 }
4833 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4834 Value *Address = EmitScalarExpr(E->getArg(0));
4835 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4836 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4837 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4838 }
4839 case Builtin::BI__builtin_eh_return: {
4840 Value *Int = EmitScalarExpr(E->getArg(0));
4841 Value *Ptr = EmitScalarExpr(E->getArg(1));
4842
4843 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4844 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4845 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4846 Function *F =
4847 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4848 : Intrinsic::eh_return_i64);
4849 Builder.CreateCall(F, {Int, Ptr});
4850 Builder.CreateUnreachable();
4851
4852 // We do need to preserve an insertion point.
4853 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4854
4855 return RValue::get(nullptr);
4856 }
4857 case Builtin::BI__builtin_unwind_init: {
4858 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4859 Builder.CreateCall(F);
4860 return RValue::get(nullptr);
4861 }
4862 case Builtin::BI__builtin_extend_pointer: {
4863 // Extends a pointer to the size of an _Unwind_Word, which is
4864 // uint64_t on all platforms. Generally this gets poked into a
4865 // register and eventually used as an address, so if the
4866 // addressing registers are wider than pointers and the platform
4867 // doesn't implicitly ignore high-order bits when doing
4868 // addressing, we need to make sure we zext / sext based on
4869 // the platform's expectations.
4870 //
4871 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4872
4873 // Cast the pointer to intptr_t.
4874 Value *Ptr = EmitScalarExpr(E->getArg(0));
4875 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4876
4877 // If that's 64 bits, we're done.
4878 if (IntPtrTy->getBitWidth() == 64)
4879 return RValue::get(Result);
4880
4881 // Otherwise, ask the codegen data what to do.
4882 if (getTargetHooks().extendPointerWithSExt())
4883 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4884 else
4885 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4886 }
4887 case Builtin::BI__builtin_setjmp: {
4888 // Buffer is a void**.
4889 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4890
4891 if (getTarget().getTriple().getArch() == llvm::Triple::systemz) {
4892 // On this target, the back end fills in the context buffer completely.
4893 // It doesn't really matter if the frontend stores to the buffer before
4894 // calling setjmp, the back-end is going to overwrite them anyway.
4895 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4896 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4897 }
4898
4899 // Store the frame pointer to the setjmp buffer.
4900 Value *FrameAddr = Builder.CreateCall(
4901 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4902 ConstantInt::get(Int32Ty, 0));
4903 Builder.CreateStore(FrameAddr, Buf);
4904
4905 // Store the stack pointer to the setjmp buffer.
4906 Value *StackAddr = Builder.CreateStackSave();
4907 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4908
4909 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4910 Builder.CreateStore(StackAddr, StackSaveSlot);
4911
4912 // Call LLVM's EH setjmp, which is lightweight.
4913 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4914 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4915 }
4916 case Builtin::BI__builtin_longjmp: {
4917 Value *Buf = EmitScalarExpr(E->getArg(0));
4918
4919 // Call LLVM's EH longjmp, which is lightweight.
4920 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4921
4922 // longjmp doesn't return; mark this as unreachable.
4923 Builder.CreateUnreachable();
4924
4925 // We do need to preserve an insertion point.
4926 EmitBlock(createBasicBlock("longjmp.cont"));
4927
4928 return RValue::get(nullptr);
4929 }
4930 case Builtin::BI__builtin_launder: {
4931 const Expr *Arg = E->getArg(0);
4932 QualType ArgTy = Arg->getType()->getPointeeType();
4933 Value *Ptr = EmitScalarExpr(Arg);
4934 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4936
4937 return RValue::get(Ptr);
4938 }
4939 case Builtin::BI__sync_fetch_and_add:
4940 case Builtin::BI__sync_fetch_and_sub:
4941 case Builtin::BI__sync_fetch_and_or:
4942 case Builtin::BI__sync_fetch_and_and:
4943 case Builtin::BI__sync_fetch_and_xor:
4944 case Builtin::BI__sync_fetch_and_nand:
4945 case Builtin::BI__sync_add_and_fetch:
4946 case Builtin::BI__sync_sub_and_fetch:
4947 case Builtin::BI__sync_and_and_fetch:
4948 case Builtin::BI__sync_or_and_fetch:
4949 case Builtin::BI__sync_xor_and_fetch:
4950 case Builtin::BI__sync_nand_and_fetch:
4951 case Builtin::BI__sync_val_compare_and_swap:
4952 case Builtin::BI__sync_bool_compare_and_swap:
4953 case Builtin::BI__sync_lock_test_and_set:
4954 case Builtin::BI__sync_lock_release:
4955 case Builtin::BI__sync_swap:
4956 llvm_unreachable("Shouldn't make it through sema");
4957 case Builtin::BI__sync_fetch_and_add_1:
4958 case Builtin::BI__sync_fetch_and_add_2:
4959 case Builtin::BI__sync_fetch_and_add_4:
4960 case Builtin::BI__sync_fetch_and_add_8:
4961 case Builtin::BI__sync_fetch_and_add_16:
4962 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4963 case Builtin::BI__sync_fetch_and_sub_1:
4964 case Builtin::BI__sync_fetch_and_sub_2:
4965 case Builtin::BI__sync_fetch_and_sub_4:
4966 case Builtin::BI__sync_fetch_and_sub_8:
4967 case Builtin::BI__sync_fetch_and_sub_16:
4968 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4969 case Builtin::BI__sync_fetch_and_or_1:
4970 case Builtin::BI__sync_fetch_and_or_2:
4971 case Builtin::BI__sync_fetch_and_or_4:
4972 case Builtin::BI__sync_fetch_and_or_8:
4973 case Builtin::BI__sync_fetch_and_or_16:
4974 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4975 case Builtin::BI__sync_fetch_and_and_1:
4976 case Builtin::BI__sync_fetch_and_and_2:
4977 case Builtin::BI__sync_fetch_and_and_4:
4978 case Builtin::BI__sync_fetch_and_and_8:
4979 case Builtin::BI__sync_fetch_and_and_16:
4980 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4981 case Builtin::BI__sync_fetch_and_xor_1:
4982 case Builtin::BI__sync_fetch_and_xor_2:
4983 case Builtin::BI__sync_fetch_and_xor_4:
4984 case Builtin::BI__sync_fetch_and_xor_8:
4985 case Builtin::BI__sync_fetch_and_xor_16:
4986 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4987 case Builtin::BI__sync_fetch_and_nand_1:
4988 case Builtin::BI__sync_fetch_and_nand_2:
4989 case Builtin::BI__sync_fetch_and_nand_4:
4990 case Builtin::BI__sync_fetch_and_nand_8:
4991 case Builtin::BI__sync_fetch_and_nand_16:
4992 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4993
4994 // Clang extensions: not overloaded yet.
4995 case Builtin::BI__sync_fetch_and_min:
4996 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4997 case Builtin::BI__sync_fetch_and_max:
4998 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4999 case Builtin::BI__sync_fetch_and_umin:
5000 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
5001 case Builtin::BI__sync_fetch_and_umax:
5002 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
5003
5004 case Builtin::BI__sync_add_and_fetch_1:
5005 case Builtin::BI__sync_add_and_fetch_2:
5006 case Builtin::BI__sync_add_and_fetch_4:
5007 case Builtin::BI__sync_add_and_fetch_8:
5008 case Builtin::BI__sync_add_and_fetch_16:
5009 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
5010 llvm::Instruction::Add);
5011 case Builtin::BI__sync_sub_and_fetch_1:
5012 case Builtin::BI__sync_sub_and_fetch_2:
5013 case Builtin::BI__sync_sub_and_fetch_4:
5014 case Builtin::BI__sync_sub_and_fetch_8:
5015 case Builtin::BI__sync_sub_and_fetch_16:
5016 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
5017 llvm::Instruction::Sub);
5018 case Builtin::BI__sync_and_and_fetch_1:
5019 case Builtin::BI__sync_and_and_fetch_2:
5020 case Builtin::BI__sync_and_and_fetch_4:
5021 case Builtin::BI__sync_and_and_fetch_8:
5022 case Builtin::BI__sync_and_and_fetch_16:
5023 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
5024 llvm::Instruction::And);
5025 case Builtin::BI__sync_or_and_fetch_1:
5026 case Builtin::BI__sync_or_and_fetch_2:
5027 case Builtin::BI__sync_or_and_fetch_4:
5028 case Builtin::BI__sync_or_and_fetch_8:
5029 case Builtin::BI__sync_or_and_fetch_16:
5030 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
5031 llvm::Instruction::Or);
5032 case Builtin::BI__sync_xor_and_fetch_1:
5033 case Builtin::BI__sync_xor_and_fetch_2:
5034 case Builtin::BI__sync_xor_and_fetch_4:
5035 case Builtin::BI__sync_xor_and_fetch_8:
5036 case Builtin::BI__sync_xor_and_fetch_16:
5037 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
5038 llvm::Instruction::Xor);
5039 case Builtin::BI__sync_nand_and_fetch_1:
5040 case Builtin::BI__sync_nand_and_fetch_2:
5041 case Builtin::BI__sync_nand_and_fetch_4:
5042 case Builtin::BI__sync_nand_and_fetch_8:
5043 case Builtin::BI__sync_nand_and_fetch_16:
5044 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
5045 llvm::Instruction::And, true);
5046
5047 case Builtin::BI__sync_val_compare_and_swap_1:
5048 case Builtin::BI__sync_val_compare_and_swap_2:
5049 case Builtin::BI__sync_val_compare_and_swap_4:
5050 case Builtin::BI__sync_val_compare_and_swap_8:
5051 case Builtin::BI__sync_val_compare_and_swap_16:
5052 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
5053
5054 case Builtin::BI__sync_bool_compare_and_swap_1:
5055 case Builtin::BI__sync_bool_compare_and_swap_2:
5056 case Builtin::BI__sync_bool_compare_and_swap_4:
5057 case Builtin::BI__sync_bool_compare_and_swap_8:
5058 case Builtin::BI__sync_bool_compare_and_swap_16:
5059 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
5060
5061 case Builtin::BI__sync_swap_1:
5062 case Builtin::BI__sync_swap_2:
5063 case Builtin::BI__sync_swap_4:
5064 case Builtin::BI__sync_swap_8:
5065 case Builtin::BI__sync_swap_16:
5066 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5067
5068 case Builtin::BI__sync_lock_test_and_set_1:
5069 case Builtin::BI__sync_lock_test_and_set_2:
5070 case Builtin::BI__sync_lock_test_and_set_4:
5071 case Builtin::BI__sync_lock_test_and_set_8:
5072 case Builtin::BI__sync_lock_test_and_set_16:
5073 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5074
5075 case Builtin::BI__sync_lock_release_1:
5076 case Builtin::BI__sync_lock_release_2:
5077 case Builtin::BI__sync_lock_release_4:
5078 case Builtin::BI__sync_lock_release_8:
5079 case Builtin::BI__sync_lock_release_16: {
5080 Address Ptr = CheckAtomicAlignment(*this, E);
5081 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5082
5083 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5084 getContext().getTypeSize(ElTy));
5085 llvm::StoreInst *Store =
5086 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
5087 Store->setAtomic(llvm::AtomicOrdering::Release);
5088 return RValue::get(nullptr);
5089 }
5090
5091 case Builtin::BI__sync_synchronize: {
5092 // We assume this is supposed to correspond to a C++0x-style
5093 // sequentially-consistent fence (i.e. this is only usable for
5094 // synchronization, not device I/O or anything like that). This intrinsic
5095 // is really badly designed in the sense that in theory, there isn't
5096 // any way to safely use it... but in practice, it mostly works
5097 // to use it with non-atomic loads and stores to get acquire/release
5098 // semantics.
5099 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
5100 return RValue::get(nullptr);
5101 }
5102
5103 case Builtin::BI__builtin_nontemporal_load:
5104 return RValue::get(EmitNontemporalLoad(*this, E));
5105 case Builtin::BI__builtin_nontemporal_store:
5106 return RValue::get(EmitNontemporalStore(*this, E));
5107 case Builtin::BI__c11_atomic_is_lock_free:
5108 case Builtin::BI__atomic_is_lock_free: {
5109 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
5110 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
5111 // _Atomic(T) is always properly-aligned.
5112 const char *LibCallName = "__atomic_is_lock_free";
5113 CallArgList Args;
5114 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
5115 getContext().getSizeType());
5116 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
5117 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
5119 else
5120 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
5122 const CGFunctionInfo &FuncInfo =
5124 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
5125 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
5126 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
5127 ReturnValueSlot(), Args);
5128 }
5129
5130 case Builtin::BI__atomic_test_and_set: {
5131 // Look at the argument type to determine whether this is a volatile
5132 // operation. The parameter type is always volatile.
5133 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
5134 bool Volatile =
5136
5137 Address Ptr =
5139
5140 Value *NewVal = Builder.getInt8(1);
5141 Value *Order = EmitScalarExpr(E->getArg(1));
5142 if (isa<llvm::ConstantInt>(Order)) {
5143 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5144 AtomicRMWInst *Result = nullptr;
5145 switch (ord) {
5146 case 0: // memory_order_relaxed
5147 default: // invalid order
5148 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5149 llvm::AtomicOrdering::Monotonic);
5150 break;
5151 case 1: // memory_order_consume
5152 case 2: // memory_order_acquire
5153 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5154 llvm::AtomicOrdering::Acquire);
5155 break;
5156 case 3: // memory_order_release
5157 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5158 llvm::AtomicOrdering::Release);
5159 break;
5160 case 4: // memory_order_acq_rel
5161
5162 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5163 llvm::AtomicOrdering::AcquireRelease);
5164 break;
5165 case 5: // memory_order_seq_cst
5167 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5168 llvm::AtomicOrdering::SequentiallyConsistent);
5169 break;
5170 }
5171 Result->setVolatile(Volatile);
5172 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
5173 }
5174
5175 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5176
5177 llvm::BasicBlock *BBs[5] = {
5178 createBasicBlock("monotonic", CurFn),
5179 createBasicBlock("acquire", CurFn),
5180 createBasicBlock("release", CurFn),
5181 createBasicBlock("acqrel", CurFn),
5182 createBasicBlock("seqcst", CurFn)
5183 };
5184 llvm::AtomicOrdering Orders[5] = {
5185 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
5186 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
5187 llvm::AtomicOrdering::SequentiallyConsistent};
5188
5189 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5190 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
5191
5192 Builder.SetInsertPoint(ContBB);
5193 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
5194
5195 for (unsigned i = 0; i < 5; ++i) {
5196 Builder.SetInsertPoint(BBs[i]);
5197 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
5198 Ptr, NewVal, Orders[i]);
5199 RMW->setVolatile(Volatile);
5200 Result->addIncoming(RMW, BBs[i]);
5201 Builder.CreateBr(ContBB);
5202 }
5203
5204 SI->addCase(Builder.getInt32(0), BBs[0]);
5205 SI->addCase(Builder.getInt32(1), BBs[1]);
5206 SI->addCase(Builder.getInt32(2), BBs[1]);
5207 SI->addCase(Builder.getInt32(3), BBs[2]);
5208 SI->addCase(Builder.getInt32(4), BBs[3]);
5209 SI->addCase(Builder.getInt32(5), BBs[4]);
5210
5211 Builder.SetInsertPoint(ContBB);
5212 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
5213 }
5214
5215 case Builtin::BI__atomic_clear: {
5216 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
5217 bool Volatile =
5219
5220 Address Ptr = EmitPointerWithAlignment(E->getArg(0));
5221 Ptr = Ptr.withElementType(Int8Ty);
5222 Value *NewVal = Builder.getInt8(0);
5223 Value *Order = EmitScalarExpr(E->getArg(1));
5224 if (isa<llvm::ConstantInt>(Order)) {
5225 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5226 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
5227 switch (ord) {
5228 case 0: // memory_order_relaxed
5229 default: // invalid order
5230 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
5231 break;
5232 case 3: // memory_order_release
5233 Store->setOrdering(llvm::AtomicOrdering::Release);
5234 break;
5235 case 5: // memory_order_seq_cst
5236 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
5237 break;
5238 }
5239 return RValue::get(nullptr);
5240 }
5241
5242 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5243
5244 llvm::BasicBlock *BBs[3] = {
5245 createBasicBlock("monotonic", CurFn),
5246 createBasicBlock("release", CurFn),
5247 createBasicBlock("seqcst", CurFn)
5248 };
5249 llvm::AtomicOrdering Orders[3] = {
5250 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
5251 llvm::AtomicOrdering::SequentiallyConsistent};
5252
5253 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5254 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
5255
5256 for (unsigned i = 0; i < 3; ++i) {
5257 Builder.SetInsertPoint(BBs[i]);
5258 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
5259 Store->setOrdering(Orders[i]);
5260 Builder.CreateBr(ContBB);
5261 }
5262
5263 SI->addCase(Builder.getInt32(0), BBs[0]);
5264 SI->addCase(Builder.getInt32(3), BBs[1]);
5265 SI->addCase(Builder.getInt32(5), BBs[2]);
5266
5267 Builder.SetInsertPoint(ContBB);
5268 return RValue::get(nullptr);
5269 }
5270
5271 case Builtin::BI__atomic_thread_fence:
5272 case Builtin::BI__atomic_signal_fence:
5273 case Builtin::BI__c11_atomic_thread_fence:
5274 case Builtin::BI__c11_atomic_signal_fence: {
5275 llvm::SyncScope::ID SSID;
5276 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
5277 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
5278 SSID = llvm::SyncScope::SingleThread;
5279 else
5280 SSID = llvm::SyncScope::System;
5281 Value *Order = EmitScalarExpr(E->getArg(0));
5282 if (isa<llvm::ConstantInt>(Order)) {
5283 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5284 switch (ord) {
5285 case 0: // memory_order_relaxed
5286 default: // invalid order
5287 break;
5288 case 1: // memory_order_consume
5289 case 2: // memory_order_acquire
5290 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5291 break;
5292 case 3: // memory_order_release
5293 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5294 break;
5295 case 4: // memory_order_acq_rel
5296 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5297 break;
5298 case 5: // memory_order_seq_cst
5299 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5300 break;
5301 }
5302 return RValue::get(nullptr);
5303 }
5304
5305 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
5306 AcquireBB = createBasicBlock("acquire", CurFn);
5307 ReleaseBB = createBasicBlock("release", CurFn);
5308 AcqRelBB = createBasicBlock("acqrel", CurFn);
5309 SeqCstBB = createBasicBlock("seqcst", CurFn);
5310 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5311
5312 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5313 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5314
5315 Builder.SetInsertPoint(AcquireBB);
5316 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5317 Builder.CreateBr(ContBB);
5318 SI->addCase(Builder.getInt32(1), AcquireBB);
5319 SI->addCase(Builder.getInt32(2), AcquireBB);
5320
5321 Builder.SetInsertPoint(ReleaseBB);
5322 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5323 Builder.CreateBr(ContBB);
5324 SI->addCase(Builder.getInt32(3), ReleaseBB);
5325
5326 Builder.SetInsertPoint(AcqRelBB);
5327 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5328 Builder.CreateBr(ContBB);
5329 SI->addCase(Builder.getInt32(4), AcqRelBB);
5330
5331 Builder.SetInsertPoint(SeqCstBB);
5332 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5333 Builder.CreateBr(ContBB);
5334 SI->addCase(Builder.getInt32(5), SeqCstBB);
5335
5336 Builder.SetInsertPoint(ContBB);
5337 return RValue::get(nullptr);
5338 }
5339 case Builtin::BI__scoped_atomic_thread_fence: {
5341
5342 Value *Order = EmitScalarExpr(E->getArg(0));
5343 Value *Scope = EmitScalarExpr(E->getArg(1));
5344 auto Ord = dyn_cast<llvm::ConstantInt>(Order);
5345 auto Scp = dyn_cast<llvm::ConstantInt>(Scope);
5346 if (Ord && Scp) {
5347 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5348 ? ScopeModel->map(Scp->getZExtValue())
5349 : ScopeModel->map(ScopeModel->getFallBackValue());
5350 switch (Ord->getZExtValue()) {
5351 case 0: // memory_order_relaxed
5352 default: // invalid order
5353 break;
5354 case 1: // memory_order_consume
5355 case 2: // memory_order_acquire
5356 Builder.CreateFence(
5357 llvm::AtomicOrdering::Acquire,
5358 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5359 llvm::AtomicOrdering::Acquire,
5360 getLLVMContext()));
5361 break;
5362 case 3: // memory_order_release
5363 Builder.CreateFence(
5364 llvm::AtomicOrdering::Release,
5365 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5366 llvm::AtomicOrdering::Release,
5367 getLLVMContext()));
5368 break;
5369 case 4: // memory_order_acq_rel
5370 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
5371 getTargetHooks().getLLVMSyncScopeID(
5372 getLangOpts(), SS,
5373 llvm::AtomicOrdering::AcquireRelease,
5374 getLLVMContext()));
5375 break;
5376 case 5: // memory_order_seq_cst
5377 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5378 getTargetHooks().getLLVMSyncScopeID(
5379 getLangOpts(), SS,
5380 llvm::AtomicOrdering::SequentiallyConsistent,
5381 getLLVMContext()));
5382 break;
5383 }
5384 return RValue::get(nullptr);
5385 }
5386
5387 llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
5388
5390 OrderBBs;
5391 if (Ord) {
5392 switch (Ord->getZExtValue()) {
5393 case 0: // memory_order_relaxed
5394 default: // invalid order
5395 ContBB->eraseFromParent();
5396 return RValue::get(nullptr);
5397 case 1: // memory_order_consume
5398 case 2: // memory_order_acquire
5399 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5400 llvm::AtomicOrdering::Acquire);
5401 break;
5402 case 3: // memory_order_release
5403 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5404 llvm::AtomicOrdering::Release);
5405 break;
5406 case 4: // memory_order_acq_rel
5407 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5408 llvm::AtomicOrdering::AcquireRelease);
5409 break;
5410 case 5: // memory_order_seq_cst
5411 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5412 llvm::AtomicOrdering::SequentiallyConsistent);
5413 break;
5414 }
5415 } else {
5416 llvm::BasicBlock *AcquireBB = createBasicBlock("acquire", CurFn);
5417 llvm::BasicBlock *ReleaseBB = createBasicBlock("release", CurFn);
5418 llvm::BasicBlock *AcqRelBB = createBasicBlock("acqrel", CurFn);
5419 llvm::BasicBlock *SeqCstBB = createBasicBlock("seqcst", CurFn);
5420
5421 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5422 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5423 SI->addCase(Builder.getInt32(1), AcquireBB);
5424 SI->addCase(Builder.getInt32(2), AcquireBB);
5425 SI->addCase(Builder.getInt32(3), ReleaseBB);
5426 SI->addCase(Builder.getInt32(4), AcqRelBB);
5427 SI->addCase(Builder.getInt32(5), SeqCstBB);
5428
5429 OrderBBs.emplace_back(AcquireBB, llvm::AtomicOrdering::Acquire);
5430 OrderBBs.emplace_back(ReleaseBB, llvm::AtomicOrdering::Release);
5431 OrderBBs.emplace_back(AcqRelBB, llvm::AtomicOrdering::AcquireRelease);
5432 OrderBBs.emplace_back(SeqCstBB,
5433 llvm::AtomicOrdering::SequentiallyConsistent);
5434 }
5435
5436 for (auto &[OrderBB, Ordering] : OrderBBs) {
5437 Builder.SetInsertPoint(OrderBB);
5438 if (Scp) {
5439 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5440 ? ScopeModel->map(Scp->getZExtValue())
5441 : ScopeModel->map(ScopeModel->getFallBackValue());
5442 Builder.CreateFence(Ordering,
5443 getTargetHooks().getLLVMSyncScopeID(
5444 getLangOpts(), SS, Ordering, getLLVMContext()));
5445 Builder.CreateBr(ContBB);
5446 } else {
5447 llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
5448 for (unsigned Scp : ScopeModel->getRuntimeValues())
5449 BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
5450
5451 auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
5452 llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
5453 for (unsigned Scp : ScopeModel->getRuntimeValues()) {
5454 auto *B = BBs[Scp];
5455 SI->addCase(Builder.getInt32(Scp), B);
5456
5457 Builder.SetInsertPoint(B);
5458 Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
5459 getLangOpts(), ScopeModel->map(Scp),
5460 Ordering, getLLVMContext()));
5461 Builder.CreateBr(ContBB);
5462 }
5463 }
5464 }
5465
5466 Builder.SetInsertPoint(ContBB);
5467 return RValue::get(nullptr);
5468 }
5469
5470 case Builtin::BI__builtin_signbit:
5471 case Builtin::BI__builtin_signbitf:
5472 case Builtin::BI__builtin_signbitl: {
5473 return RValue::get(
5474 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
5475 ConvertType(E->getType())));
5476 }
5477 case Builtin::BI__warn_memset_zero_len:
5478 return RValue::getIgnored();
5479 case Builtin::BI__annotation: {
5480 // Re-encode each wide string to UTF8 and make an MDString.
5482 for (const Expr *Arg : E->arguments()) {
5483 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
5484 assert(Str->getCharByteWidth() == 2);
5485 StringRef WideBytes = Str->getBytes();
5486 std::string StrUtf8;
5487 if (!convertUTF16ToUTF8String(
5488 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5489 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5490 continue;
5491 }
5492 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5493 }
5494
5495 // Build and MDTuple of MDStrings and emit the intrinsic call.
5496 llvm::Function *F =
5497 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5498 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5499 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5500 return RValue::getIgnored();
5501 }
5502 case Builtin::BI__builtin_annotation: {
5503 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5504 llvm::Function *F =
5505 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5506 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5507
5508 // Get the annotation string, go through casts. Sema requires this to be a
5509 // non-wide string literal, potentially casted, so the cast<> is safe.
5510 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5511 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5512 return RValue::get(
5513 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5514 }
5515 case Builtin::BI__builtin_addcb:
5516 case Builtin::BI__builtin_addcs:
5517 case Builtin::BI__builtin_addc:
5518 case Builtin::BI__builtin_addcl:
5519 case Builtin::BI__builtin_addcll:
5520 case Builtin::BI__builtin_subcb:
5521 case Builtin::BI__builtin_subcs:
5522 case Builtin::BI__builtin_subc:
5523 case Builtin::BI__builtin_subcl:
5524 case Builtin::BI__builtin_subcll: {
5525
5526 // We translate all of these builtins from expressions of the form:
5527 // int x = ..., y = ..., carryin = ..., carryout, result;
5528 // result = __builtin_addc(x, y, carryin, &carryout);
5529 //
5530 // to LLVM IR of the form:
5531 //
5532 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5533 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5534 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5535 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5536 // i32 %carryin)
5537 // %result = extractvalue {i32, i1} %tmp2, 0
5538 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5539 // %tmp3 = or i1 %carry1, %carry2
5540 // %tmp4 = zext i1 %tmp3 to i32
5541 // store i32 %tmp4, i32* %carryout
5542
5543 // Scalarize our inputs.
5544 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5545 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5546 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5547 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5548
5549 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5550 llvm::Intrinsic::ID IntrinsicId;
5551 switch (BuiltinID) {
5552 default: llvm_unreachable("Unknown multiprecision builtin id.");
5553 case Builtin::BI__builtin_addcb:
5554 case Builtin::BI__builtin_addcs:
5555 case Builtin::BI__builtin_addc:
5556 case Builtin::BI__builtin_addcl:
5557 case Builtin::BI__builtin_addcll:
5558 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5559 break;
5560 case Builtin::BI__builtin_subcb:
5561 case Builtin::BI__builtin_subcs:
5562 case Builtin::BI__builtin_subc:
5563 case Builtin::BI__builtin_subcl:
5564 case Builtin::BI__builtin_subcll:
5565 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5566 break;
5567 }
5568
5569 // Construct our resulting LLVM IR expression.
5570 llvm::Value *Carry1;
5571 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5572 X, Y, Carry1);
5573 llvm::Value *Carry2;
5574 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5575 Sum1, Carryin, Carry2);
5576 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5577 X->getType());
5578 Builder.CreateStore(CarryOut, CarryOutPtr);
5579 return RValue::get(Sum2);
5580 }
5581
5582 case Builtin::BI__builtin_add_overflow:
5583 case Builtin::BI__builtin_sub_overflow:
5584 case Builtin::BI__builtin_mul_overflow: {
5585 const clang::Expr *LeftArg = E->getArg(0);
5586 const clang::Expr *RightArg = E->getArg(1);
5587 const clang::Expr *ResultArg = E->getArg(2);
5588
5589 clang::QualType ResultQTy =
5590 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5591
5592 WidthAndSignedness LeftInfo =
5594 WidthAndSignedness RightInfo =
5596 WidthAndSignedness ResultInfo =
5598
5599 // Handle mixed-sign multiplication as a special case, because adding
5600 // runtime or backend support for our generic irgen would be too expensive.
5601 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5602 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5603 RightInfo, ResultArg, ResultQTy,
5604 ResultInfo);
5605
5606 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5607 ResultInfo))
5609 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5610 ResultInfo);
5611
5612 WidthAndSignedness EncompassingInfo =
5613 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5614
5615 llvm::Type *EncompassingLLVMTy =
5616 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5617
5618 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5619
5620 llvm::Intrinsic::ID IntrinsicId;
5621 switch (BuiltinID) {
5622 default:
5623 llvm_unreachable("Unknown overflow builtin id.");
5624 case Builtin::BI__builtin_add_overflow:
5625 IntrinsicId = EncompassingInfo.Signed
5626 ? llvm::Intrinsic::sadd_with_overflow
5627 : llvm::Intrinsic::uadd_with_overflow;
5628 break;
5629 case Builtin::BI__builtin_sub_overflow:
5630 IntrinsicId = EncompassingInfo.Signed
5631 ? llvm::Intrinsic::ssub_with_overflow
5632 : llvm::Intrinsic::usub_with_overflow;
5633 break;
5634 case Builtin::BI__builtin_mul_overflow:
5635 IntrinsicId = EncompassingInfo.Signed
5636 ? llvm::Intrinsic::smul_with_overflow
5637 : llvm::Intrinsic::umul_with_overflow;
5638 break;
5639 }
5640
5641 llvm::Value *Left = EmitScalarExpr(LeftArg);
5642 llvm::Value *Right = EmitScalarExpr(RightArg);
5643 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5644
5645 // Extend each operand to the encompassing type.
5646 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5647 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5648
5649 // Perform the operation on the extended values.
5650 llvm::Value *Overflow, *Result;
5651 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5652
5653 if (EncompassingInfo.Width > ResultInfo.Width) {
5654 // The encompassing type is wider than the result type, so we need to
5655 // truncate it.
5656 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5657
5658 // To see if the truncation caused an overflow, we will extend
5659 // the result and then compare it to the original result.
5660 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5661 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5662 llvm::Value *TruncationOverflow =
5663 Builder.CreateICmpNE(Result, ResultTruncExt);
5664
5665 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5666 Result = ResultTrunc;
5667 }
5668
5669 // Finally, store the result using the pointer.
5670 bool isVolatile =
5671 ResultArg->getType()->getPointeeType().isVolatileQualified();
5672 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5673
5674 return RValue::get(Overflow);
5675 }
5676
5677 case Builtin::BI__builtin_uadd_overflow:
5678 case Builtin::BI__builtin_uaddl_overflow:
5679 case Builtin::BI__builtin_uaddll_overflow:
5680 case Builtin::BI__builtin_usub_overflow:
5681 case Builtin::BI__builtin_usubl_overflow:
5682 case Builtin::BI__builtin_usubll_overflow:
5683 case Builtin::BI__builtin_umul_overflow:
5684 case Builtin::BI__builtin_umull_overflow:
5685 case Builtin::BI__builtin_umulll_overflow:
5686 case Builtin::BI__builtin_sadd_overflow:
5687 case Builtin::BI__builtin_saddl_overflow:
5688 case Builtin::BI__builtin_saddll_overflow:
5689 case Builtin::BI__builtin_ssub_overflow:
5690 case Builtin::BI__builtin_ssubl_overflow:
5691 case Builtin::BI__builtin_ssubll_overflow:
5692 case Builtin::BI__builtin_smul_overflow:
5693 case Builtin::BI__builtin_smull_overflow:
5694 case Builtin::BI__builtin_smulll_overflow: {
5695
5696 // We translate all of these builtins directly to the relevant llvm IR node.
5697
5698 // Scalarize our inputs.
5699 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5700 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5701 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5702
5703 // Decide which of the overflow intrinsics we are lowering to:
5704 llvm::Intrinsic::ID IntrinsicId;
5705 switch (BuiltinID) {
5706 default: llvm_unreachable("Unknown overflow builtin id.");
5707 case Builtin::BI__builtin_uadd_overflow:
5708 case Builtin::BI__builtin_uaddl_overflow:
5709 case Builtin::BI__builtin_uaddll_overflow:
5710 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5711 break;
5712 case Builtin::BI__builtin_usub_overflow:
5713 case Builtin::BI__builtin_usubl_overflow:
5714 case Builtin::BI__builtin_usubll_overflow:
5715 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5716 break;
5717 case Builtin::BI__builtin_umul_overflow:
5718 case Builtin::BI__builtin_umull_overflow:
5719 case Builtin::BI__builtin_umulll_overflow:
5720 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5721 break;
5722 case Builtin::BI__builtin_sadd_overflow:
5723 case Builtin::BI__builtin_saddl_overflow:
5724 case Builtin::BI__builtin_saddll_overflow:
5725 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5726 break;
5727 case Builtin::BI__builtin_ssub_overflow:
5728 case Builtin::BI__builtin_ssubl_overflow:
5729 case Builtin::BI__builtin_ssubll_overflow:
5730 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5731 break;
5732 case Builtin::BI__builtin_smul_overflow:
5733 case Builtin::BI__builtin_smull_overflow:
5734 case Builtin::BI__builtin_smulll_overflow:
5735 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5736 break;
5737 }
5738
5739
5740 llvm::Value *Carry;
5741 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5742 Builder.CreateStore(Sum, SumOutPtr);
5743
5744 return RValue::get(Carry);
5745 }
5746 case Builtin::BIaddressof:
5747 case Builtin::BI__addressof:
5748 case Builtin::BI__builtin_addressof:
5749 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5750 case Builtin::BI__builtin_function_start:
5753 case Builtin::BI__builtin_operator_new:
5755 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5756 case Builtin::BI__builtin_operator_delete:
5758 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5759 return RValue::get(nullptr);
5760
5761 case Builtin::BI__builtin_is_aligned:
5762 return EmitBuiltinIsAligned(E);
5763 case Builtin::BI__builtin_align_up:
5764 return EmitBuiltinAlignTo(E, true);
5765 case Builtin::BI__builtin_align_down:
5766 return EmitBuiltinAlignTo(E, false);
5767
5768 case Builtin::BI__noop:
5769 // __noop always evaluates to an integer literal zero.
5770 return RValue::get(ConstantInt::get(IntTy, 0));
5771 case Builtin::BI__builtin_call_with_static_chain: {
5772 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5773 const Expr *Chain = E->getArg(1);
5774 return EmitCall(Call->getCallee()->getType(),
5775 EmitCallee(Call->getCallee()), Call, ReturnValue,
5776 EmitScalarExpr(Chain));
5777 }
5778 case Builtin::BI_InterlockedExchange8:
5779 case Builtin::BI_InterlockedExchange16:
5780 case Builtin::BI_InterlockedExchange:
5781 case Builtin::BI_InterlockedExchangePointer:
5782 return RValue::get(
5783 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5784 case Builtin::BI_InterlockedCompareExchangePointer:
5785 return RValue::get(
5786 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange, E));
5787 case Builtin::BI_InterlockedCompareExchangePointer_nf:
5788 return RValue::get(
5789 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E));
5790 case Builtin::BI_InterlockedCompareExchange8:
5791 case Builtin::BI_InterlockedCompareExchange16:
5792 case Builtin::BI_InterlockedCompareExchange:
5793 case Builtin::BI_InterlockedCompareExchange64:
5795 case Builtin::BI_InterlockedIncrement16:
5796 case Builtin::BI_InterlockedIncrement:
5797 return RValue::get(
5798 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5799 case Builtin::BI_InterlockedDecrement16:
5800 case Builtin::BI_InterlockedDecrement:
5801 return RValue::get(
5802 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5803 case Builtin::BI_InterlockedAnd8:
5804 case Builtin::BI_InterlockedAnd16:
5805 case Builtin::BI_InterlockedAnd:
5806 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5807 case Builtin::BI_InterlockedExchangeAdd8:
5808 case Builtin::BI_InterlockedExchangeAdd16:
5809 case Builtin::BI_InterlockedExchangeAdd:
5810 return RValue::get(
5811 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5812 case Builtin::BI_InterlockedExchangeSub8:
5813 case Builtin::BI_InterlockedExchangeSub16:
5814 case Builtin::BI_InterlockedExchangeSub:
5815 return RValue::get(
5816 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5817 case Builtin::BI_InterlockedOr8:
5818 case Builtin::BI_InterlockedOr16:
5819 case Builtin::BI_InterlockedOr:
5820 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5821 case Builtin::BI_InterlockedXor8:
5822 case Builtin::BI_InterlockedXor16:
5823 case Builtin::BI_InterlockedXor:
5824 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5825
5826 case Builtin::BI_bittest64:
5827 case Builtin::BI_bittest:
5828 case Builtin::BI_bittestandcomplement64:
5829 case Builtin::BI_bittestandcomplement:
5830 case Builtin::BI_bittestandreset64:
5831 case Builtin::BI_bittestandreset:
5832 case Builtin::BI_bittestandset64:
5833 case Builtin::BI_bittestandset:
5834 case Builtin::BI_interlockedbittestandreset:
5835 case Builtin::BI_interlockedbittestandreset64:
5836 case Builtin::BI_interlockedbittestandset64:
5837 case Builtin::BI_interlockedbittestandset:
5838 case Builtin::BI_interlockedbittestandset_acq:
5839 case Builtin::BI_interlockedbittestandset_rel:
5840 case Builtin::BI_interlockedbittestandset_nf:
5841 case Builtin::BI_interlockedbittestandreset_acq:
5842 case Builtin::BI_interlockedbittestandreset_rel:
5843 case Builtin::BI_interlockedbittestandreset_nf:
5844 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5845
5846 // These builtins exist to emit regular volatile loads and stores not
5847 // affected by the -fms-volatile setting.
5848 case Builtin::BI__iso_volatile_load8:
5849 case Builtin::BI__iso_volatile_load16:
5850 case Builtin::BI__iso_volatile_load32:
5851 case Builtin::BI__iso_volatile_load64:
5852 return RValue::get(EmitISOVolatileLoad(*this, E));
5853 case Builtin::BI__iso_volatile_store8:
5854 case Builtin::BI__iso_volatile_store16:
5855 case Builtin::BI__iso_volatile_store32:
5856 case Builtin::BI__iso_volatile_store64:
5857 return RValue::get(EmitISOVolatileStore(*this, E));
5858
5859 case Builtin::BI__builtin_ptrauth_sign_constant:
5860 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5861
5862 case Builtin::BI__builtin_ptrauth_auth:
5863 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5864 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5865 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5866 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5867 case Builtin::BI__builtin_ptrauth_strip: {
5868 // Emit the arguments.
5870 for (auto argExpr : E->arguments())
5871 Args.push_back(EmitScalarExpr(argExpr));
5872
5873 // Cast the value to intptr_t, saving its original type.
5874 llvm::Type *OrigValueType = Args[0]->getType();
5875 if (OrigValueType->isPointerTy())
5876 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5877
5878 switch (BuiltinID) {
5879 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5880 if (Args[4]->getType()->isPointerTy())
5881 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5882 [[fallthrough]];
5883
5884 case Builtin::BI__builtin_ptrauth_auth:
5885 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5886 if (Args[2]->getType()->isPointerTy())
5887 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5888 break;
5889
5890 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5891 if (Args[1]->getType()->isPointerTy())
5892 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5893 break;
5894
5895 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5896 case Builtin::BI__builtin_ptrauth_strip:
5897 break;
5898 }
5899
5900 // Call the intrinsic.
5901 auto IntrinsicID = [&]() -> unsigned {
5902 switch (BuiltinID) {
5903 case Builtin::BI__builtin_ptrauth_auth:
5904 return llvm::Intrinsic::ptrauth_auth;
5905 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5906 return llvm::Intrinsic::ptrauth_resign;
5907 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5908 return llvm::Intrinsic::ptrauth_blend;
5909 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5910 return llvm::Intrinsic::ptrauth_sign_generic;
5911 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5912 return llvm::Intrinsic::ptrauth_sign;
5913 case Builtin::BI__builtin_ptrauth_strip:
5914 return llvm::Intrinsic::ptrauth_strip;
5915 }
5916 llvm_unreachable("bad ptrauth intrinsic");
5917 }();
5918 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5919 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5920
5921 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5922 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5923 OrigValueType->isPointerTy()) {
5924 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5925 }
5926 return RValue::get(Result);
5927 }
5928
5929 case Builtin::BI__exception_code:
5930 case Builtin::BI_exception_code:
5932 case Builtin::BI__exception_info:
5933 case Builtin::BI_exception_info:
5935 case Builtin::BI__abnormal_termination:
5936 case Builtin::BI_abnormal_termination:
5938 case Builtin::BI_setjmpex:
5939 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5940 E->getArg(0)->getType()->isPointerType())
5941 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5942 break;
5943 case Builtin::BI_setjmp:
5944 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5945 E->getArg(0)->getType()->isPointerType()) {
5946 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5947 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5948 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5949 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5950 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5951 }
5952 break;
5953
5954 // C++ std:: builtins.
5955 case Builtin::BImove:
5956 case Builtin::BImove_if_noexcept:
5957 case Builtin::BIforward:
5958 case Builtin::BIforward_like:
5959 case Builtin::BIas_const:
5960 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5961 case Builtin::BI__GetExceptionInfo: {
5962 if (llvm::GlobalVariable *GV =
5964 return RValue::get(GV);
5965 break;
5966 }
5967
5968 case Builtin::BI__fastfail:
5969 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5970
5971 case Builtin::BI__builtin_coro_id:
5972 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5973 case Builtin::BI__builtin_coro_promise:
5974 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5975 case Builtin::BI__builtin_coro_resume:
5976 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5977 return RValue::get(nullptr);
5978 case Builtin::BI__builtin_coro_frame:
5979 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5980 case Builtin::BI__builtin_coro_noop:
5981 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5982 case Builtin::BI__builtin_coro_free:
5983 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5984 case Builtin::BI__builtin_coro_destroy:
5985 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5986 return RValue::get(nullptr);
5987 case Builtin::BI__builtin_coro_done:
5988 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5989 case Builtin::BI__builtin_coro_alloc:
5990 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5991 case Builtin::BI__builtin_coro_begin:
5992 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5993 case Builtin::BI__builtin_coro_end:
5994 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5995 case Builtin::BI__builtin_coro_suspend:
5996 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5997 case Builtin::BI__builtin_coro_size:
5998 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5999 case Builtin::BI__builtin_coro_align:
6000 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
6001
6002 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
6003 case Builtin::BIread_pipe:
6004 case Builtin::BIwrite_pipe: {
6005 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6006 *Arg1 = EmitScalarExpr(E->getArg(1));
6007 CGOpenCLRuntime OpenCLRT(CGM);
6008 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6009 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6010
6011 // Type of the generic packet parameter.
6012 unsigned GenericAS =
6014 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
6015
6016 // Testing which overloaded version we should generate the call for.
6017 if (2U == E->getNumArgs()) {
6018 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
6019 : "__write_pipe_2";
6020 // Creating a generic function type to be able to call with any builtin or
6021 // user defined type.
6022 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
6023 llvm::FunctionType *FTy = llvm::FunctionType::get(
6024 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6025 Value *ACast = Builder.CreateAddrSpaceCast(Arg1, I8PTy);
6026 return RValue::get(
6028 {Arg0, ACast, PacketSize, PacketAlign}));
6029 } else {
6030 assert(4 == E->getNumArgs() &&
6031 "Illegal number of parameters to pipe function");
6032 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
6033 : "__write_pipe_4";
6034
6035 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
6036 Int32Ty, Int32Ty};
6037 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
6038 *Arg3 = EmitScalarExpr(E->getArg(3));
6039 llvm::FunctionType *FTy = llvm::FunctionType::get(
6040 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6041 Value *ACast = Builder.CreateAddrSpaceCast(Arg3, I8PTy);
6042 // We know the third argument is an integer type, but we may need to cast
6043 // it to i32.
6044 if (Arg2->getType() != Int32Ty)
6045 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
6046 return RValue::get(
6048 {Arg0, Arg1, Arg2, ACast, PacketSize, PacketAlign}));
6049 }
6050 }
6051 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
6052 // functions
6053 case Builtin::BIreserve_read_pipe:
6054 case Builtin::BIreserve_write_pipe:
6055 case Builtin::BIwork_group_reserve_read_pipe:
6056 case Builtin::BIwork_group_reserve_write_pipe:
6057 case Builtin::BIsub_group_reserve_read_pipe:
6058 case Builtin::BIsub_group_reserve_write_pipe: {
6059 // Composing the mangled name for the function.
6060 const char *Name;
6061 if (BuiltinID == Builtin::BIreserve_read_pipe)
6062 Name = "__reserve_read_pipe";
6063 else if (BuiltinID == Builtin::BIreserve_write_pipe)
6064 Name = "__reserve_write_pipe";
6065 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
6066 Name = "__work_group_reserve_read_pipe";
6067 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
6068 Name = "__work_group_reserve_write_pipe";
6069 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
6070 Name = "__sub_group_reserve_read_pipe";
6071 else
6072 Name = "__sub_group_reserve_write_pipe";
6073
6074 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6075 *Arg1 = EmitScalarExpr(E->getArg(1));
6076 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
6077 CGOpenCLRuntime OpenCLRT(CGM);
6078 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6079 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6080
6081 // Building the generic function prototype.
6082 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
6083 llvm::FunctionType *FTy = llvm::FunctionType::get(
6084 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6085 // We know the second argument is an integer type, but we may need to cast
6086 // it to i32.
6087 if (Arg1->getType() != Int32Ty)
6088 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
6090 {Arg0, Arg1, PacketSize, PacketAlign}));
6091 }
6092 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
6093 // functions
6094 case Builtin::BIcommit_read_pipe:
6095 case Builtin::BIcommit_write_pipe:
6096 case Builtin::BIwork_group_commit_read_pipe:
6097 case Builtin::BIwork_group_commit_write_pipe:
6098 case Builtin::BIsub_group_commit_read_pipe:
6099 case Builtin::BIsub_group_commit_write_pipe: {
6100 const char *Name;
6101 if (BuiltinID == Builtin::BIcommit_read_pipe)
6102 Name = "__commit_read_pipe";
6103 else if (BuiltinID == Builtin::BIcommit_write_pipe)
6104 Name = "__commit_write_pipe";
6105 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
6106 Name = "__work_group_commit_read_pipe";
6107 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
6108 Name = "__work_group_commit_write_pipe";
6109 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
6110 Name = "__sub_group_commit_read_pipe";
6111 else
6112 Name = "__sub_group_commit_write_pipe";
6113
6114 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6115 *Arg1 = EmitScalarExpr(E->getArg(1));
6116 CGOpenCLRuntime OpenCLRT(CGM);
6117 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6118 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6119
6120 // Building the generic function prototype.
6121 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
6122 llvm::FunctionType *FTy =
6123 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
6124 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6125
6127 {Arg0, Arg1, PacketSize, PacketAlign}));
6128 }
6129 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
6130 case Builtin::BIget_pipe_num_packets:
6131 case Builtin::BIget_pipe_max_packets: {
6132 const char *BaseName;
6133 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
6134 if (BuiltinID == Builtin::BIget_pipe_num_packets)
6135 BaseName = "__get_pipe_num_packets";
6136 else
6137 BaseName = "__get_pipe_max_packets";
6138 std::string Name = std::string(BaseName) +
6139 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
6140
6141 // Building the generic function prototype.
6142 Value *Arg0 = EmitScalarExpr(E->getArg(0));
6143 CGOpenCLRuntime OpenCLRT(CGM);
6144 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6145 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6146 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
6147 llvm::FunctionType *FTy = llvm::FunctionType::get(
6148 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6149
6151 {Arg0, PacketSize, PacketAlign}));
6152 }
6153
6154 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
6155 case Builtin::BIto_global:
6156 case Builtin::BIto_local:
6157 case Builtin::BIto_private: {
6158 auto Arg0 = EmitScalarExpr(E->getArg(0));
6159 auto NewArgT = llvm::PointerType::get(
6162 auto NewRetT = llvm::PointerType::get(
6166 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
6167 llvm::Value *NewArg;
6168 if (Arg0->getType()->getPointerAddressSpace() !=
6169 NewArgT->getPointerAddressSpace())
6170 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
6171 else
6172 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
6173 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
6174 auto NewCall =
6175 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
6176 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
6177 ConvertType(E->getType())));
6178 }
6179
6180 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
6181 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
6182 // The code below expands the builtin call to a call to one of the following
6183 // functions that an OpenCL runtime library will have to provide:
6184 // __enqueue_kernel_basic
6185 // __enqueue_kernel_varargs
6186 // __enqueue_kernel_basic_events
6187 // __enqueue_kernel_events_varargs
6188 case Builtin::BIenqueue_kernel: {
6189 StringRef Name; // Generated function call name
6190 unsigned NumArgs = E->getNumArgs();
6191
6192 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
6193 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6194 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6195
6196 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
6197 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
6198 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
6199 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
6200 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
6201
6202 if (NumArgs == 4) {
6203 // The most basic form of the call with parameters:
6204 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
6205 Name = "__enqueue_kernel_basic";
6206 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
6207 GenericVoidPtrTy};
6208 llvm::FunctionType *FTy = llvm::FunctionType::get(
6209 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6210
6211 auto Info =
6212 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6213 llvm::Value *Kernel =
6214 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6215 llvm::Value *Block =
6216 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6217
6218 auto RTCall = EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6219 {Queue, Flags, Range, Kernel, Block});
6220 return RValue::get(RTCall);
6221 }
6222 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
6223
6224 // Create a temporary array to hold the sizes of local pointer arguments
6225 // for the block. \p First is the position of the first size argument.
6226 auto CreateArrayForSizeVar = [=](unsigned First)
6227 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
6228 llvm::APInt ArraySize(32, NumArgs - First);
6230 getContext().getSizeType(), ArraySize, nullptr,
6232 /*IndexTypeQuals=*/0);
6233 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
6234 llvm::Value *TmpPtr = Tmp.getPointer();
6235 // The EmitLifetime* pair expect a naked Alloca as their last argument,
6236 // however for cases where the default AS is not the Alloca AS, Tmp is
6237 // actually the Alloca ascasted to the default AS, hence the
6238 // stripPointerCasts()
6239 llvm::Value *Alloca = TmpPtr->stripPointerCasts();
6240 llvm::Value *TmpSize = EmitLifetimeStart(
6241 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
6242 llvm::Value *ElemPtr;
6243 // Each of the following arguments specifies the size of the corresponding
6244 // argument passed to the enqueued block.
6245 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
6246 for (unsigned I = First; I < NumArgs; ++I) {
6247 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
6248 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
6249 {Zero, Index});
6250 if (I == First)
6251 ElemPtr = GEP;
6252 auto *V =
6253 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
6255 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
6256 }
6257 // Return the Alloca itself rather than a potential ascast as this is only
6258 // used by the paired EmitLifetimeEnd.
6259 return std::tie(ElemPtr, TmpSize, Alloca);
6260 };
6261
6262 // Could have events and/or varargs.
6263 if (E->getArg(3)->getType()->isBlockPointerType()) {
6264 // No events passed, but has variadic arguments.
6265 Name = "__enqueue_kernel_varargs";
6266 auto Info =
6267 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6268 llvm::Value *Kernel =
6269 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6270 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6271 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6272 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
6273
6274 // Create a vector of the arguments, as well as a constant value to
6275 // express to the runtime the number of variadic arguments.
6276 llvm::Value *const Args[] = {Queue, Flags,
6277 Range, Kernel,
6278 Block, ConstantInt::get(IntTy, NumArgs - 4),
6279 ElemPtr};
6280 llvm::Type *const ArgTys[] = {
6281 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
6282 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
6283
6284 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
6285 auto Call = RValue::get(
6286 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
6287 if (TmpSize)
6288 EmitLifetimeEnd(TmpSize, TmpPtr);
6289 return Call;
6290 }
6291 // Any calls now have event arguments passed.
6292 if (NumArgs >= 7) {
6293 llvm::PointerType *PtrTy = llvm::PointerType::get(
6296
6297 llvm::Value *NumEvents =
6298 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
6299
6300 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
6301 // to be a null pointer constant (including `0` literal), we can take it
6302 // into account and emit null pointer directly.
6303 llvm::Value *EventWaitList = nullptr;
6304 if (E->getArg(4)->isNullPointerConstant(
6306 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
6307 } else {
6308 EventWaitList =
6309 E->getArg(4)->getType()->isArrayType()
6310 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
6311 : EmitScalarExpr(E->getArg(4));
6312 // Convert to generic address space.
6313 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
6314 }
6315 llvm::Value *EventRet = nullptr;
6316 if (E->getArg(5)->isNullPointerConstant(
6318 EventRet = llvm::ConstantPointerNull::get(PtrTy);
6319 } else {
6320 EventRet =
6321 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
6322 }
6323
6324 auto Info =
6325 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
6326 llvm::Value *Kernel =
6327 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6328 llvm::Value *Block =
6329 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6330
6331 std::vector<llvm::Type *> ArgTys = {
6332 QueueTy, Int32Ty, RangeTy, Int32Ty,
6333 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
6334
6335 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
6336 NumEvents, EventWaitList, EventRet,
6337 Kernel, Block};
6338
6339 if (NumArgs == 7) {
6340 // Has events but no variadics.
6341 Name = "__enqueue_kernel_basic_events";
6342 llvm::FunctionType *FTy = llvm::FunctionType::get(
6343 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6344 return RValue::get(
6347 }
6348 // Has event info and variadics
6349 // Pass the number of variadics to the runtime function too.
6350 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
6351 ArgTys.push_back(Int32Ty);
6352 Name = "__enqueue_kernel_events_varargs";
6353
6354 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6355 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
6356 Args.push_back(ElemPtr);
6357 ArgTys.push_back(ElemPtr->getType());
6358
6359 llvm::FunctionType *FTy = llvm::FunctionType::get(
6360 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6361 auto Call =
6364 if (TmpSize)
6365 EmitLifetimeEnd(TmpSize, TmpPtr);
6366 return Call;
6367 }
6368 llvm_unreachable("Unexpected enqueue_kernel signature");
6369 }
6370 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
6371 // parameter.
6372 case Builtin::BIget_kernel_work_group_size: {
6373 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6374 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6375 auto Info =
6376 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6377 Value *Kernel =
6378 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6379 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6382 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6383 false),
6384 "__get_kernel_work_group_size_impl"),
6385 {Kernel, Arg}));
6386 }
6387 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
6388 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6389 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6390 auto Info =
6391 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6392 Value *Kernel =
6393 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6394 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6397 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6398 false),
6399 "__get_kernel_preferred_work_group_size_multiple_impl"),
6400 {Kernel, Arg}));
6401 }
6402 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
6403 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
6404 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6405 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6406 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
6407 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
6408 auto Info =
6409 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
6410 Value *Kernel =
6411 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6412 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6413 const char *Name =
6414 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
6415 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
6416 : "__get_kernel_sub_group_count_for_ndrange_impl";
6419 llvm::FunctionType::get(
6420 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
6421 false),
6422 Name),
6423 {NDRange, Kernel, Block}));
6424 }
6425 case Builtin::BI__builtin_store_half:
6426 case Builtin::BI__builtin_store_halff: {
6427 Value *Val = EmitScalarExpr(E->getArg(0));
6429 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
6430 Builder.CreateStore(HalfVal, Address);
6431 return RValue::get(nullptr);
6432 }
6433 case Builtin::BI__builtin_load_half: {
6435 Value *HalfVal = Builder.CreateLoad(Address);
6436 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
6437 }
6438 case Builtin::BI__builtin_load_halff: {
6440 Value *HalfVal = Builder.CreateLoad(Address);
6441 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
6442 }
6443 case Builtin::BI__builtin_printf:
6444 case Builtin::BIprintf:
6445 if (getTarget().getTriple().isNVPTX() ||
6446 getTarget().getTriple().isAMDGCN() ||
6447 (getTarget().getTriple().isSPIRV() &&
6448 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
6449 if (getTarget().getTriple().isNVPTX())
6451 if ((getTarget().getTriple().isAMDGCN() ||
6452 getTarget().getTriple().isSPIRV()) &&
6453 getLangOpts().HIP)
6455 }
6456
6457 break;
6458 case Builtin::BI__builtin_canonicalize:
6459 case Builtin::BI__builtin_canonicalizef:
6460 case Builtin::BI__builtin_canonicalizef16:
6461 case Builtin::BI__builtin_canonicalizel:
6462 return RValue::get(
6463 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
6464
6465 case Builtin::BI__builtin_thread_pointer: {
6466 if (!getContext().getTargetInfo().isTLSSupported())
6467 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
6468 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6469 break;
6470 }
6471 case Builtin::BI__builtin_os_log_format:
6472 return emitBuiltinOSLogFormat(*E);
6473
6474 case Builtin::BI__xray_customevent: {
6476 return RValue::getIgnored();
6477
6480 return RValue::getIgnored();
6481
6482 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6483 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6484 return RValue::getIgnored();
6485
6486 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6487 auto FTy = F->getFunctionType();
6488 auto Arg0 = E->getArg(0);
6489 auto Arg0Val = EmitScalarExpr(Arg0);
6490 auto Arg0Ty = Arg0->getType();
6491 auto PTy0 = FTy->getParamType(0);
6492 if (PTy0 != Arg0Val->getType()) {
6493 if (Arg0Ty->isArrayType())
6494 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6495 else
6496 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6497 }
6498 auto Arg1 = EmitScalarExpr(E->getArg(1));
6499 auto PTy1 = FTy->getParamType(1);
6500 if (PTy1 != Arg1->getType())
6501 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6502 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6503 }
6504
6505 case Builtin::BI__xray_typedevent: {
6506 // TODO: There should be a way to always emit events even if the current
6507 // function is not instrumented. Losing events in a stream can cripple
6508 // a trace.
6510 return RValue::getIgnored();
6511
6514 return RValue::getIgnored();
6515
6516 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6517 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6518 return RValue::getIgnored();
6519
6520 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6521 auto FTy = F->getFunctionType();
6522 auto Arg0 = EmitScalarExpr(E->getArg(0));
6523 auto PTy0 = FTy->getParamType(0);
6524 if (PTy0 != Arg0->getType())
6525 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6526 auto Arg1 = E->getArg(1);
6527 auto Arg1Val = EmitScalarExpr(Arg1);
6528 auto Arg1Ty = Arg1->getType();
6529 auto PTy1 = FTy->getParamType(1);
6530 if (PTy1 != Arg1Val->getType()) {
6531 if (Arg1Ty->isArrayType())
6532 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6533 else
6534 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6535 }
6536 auto Arg2 = EmitScalarExpr(E->getArg(2));
6537 auto PTy2 = FTy->getParamType(2);
6538 if (PTy2 != Arg2->getType())
6539 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6540 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6541 }
6542
6543 case Builtin::BI__builtin_ms_va_start:
6544 case Builtin::BI__builtin_ms_va_end:
6545 return RValue::get(
6547 BuiltinID == Builtin::BI__builtin_ms_va_start));
6548
6549 case Builtin::BI__builtin_ms_va_copy: {
6550 // Lower this manually. We can't reliably determine whether or not any
6551 // given va_copy() is for a Win64 va_list from the calling convention
6552 // alone, because it's legal to do this from a System V ABI function.
6553 // With opaque pointer types, we won't have enough information in LLVM
6554 // IR to determine this from the argument types, either. Best to do it
6555 // now, while we have enough information.
6556 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6557 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6558
6559 DestAddr = DestAddr.withElementType(Int8PtrTy);
6560 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6561
6562 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6563 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6564 }
6565
6566 case Builtin::BI__builtin_get_device_side_mangled_name: {
6567 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6568 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6569 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6570 return RValue::get(Str.getPointer());
6571 }
6572 }
6573
6574 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6575 // the call using the normal call path, but using the unmangled
6576 // version of the function name.
6577 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6578 return emitLibraryCall(*this, FD, E,
6579 CGM.getBuiltinLibFunction(FD, BuiltinID));
6580
6581 // If this is a predefined lib function (e.g. malloc), emit the call
6582 // using exactly the normal call path.
6583 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6584 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6585
6586 // Check that a call to a target specific builtin has the correct target
6587 // features.
6588 // This is down here to avoid non-target specific builtins, however, if
6589 // generic builtins start to require generic target features then we
6590 // can move this up to the beginning of the function.
6592
6593 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6594 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6595
6596 // See if we have a target specific intrinsic.
6597 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6598 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6599 StringRef Prefix =
6600 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6601 if (!Prefix.empty()) {
6602 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6603 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6604 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6605 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6606 // NOTE we don't need to perform a compatibility flag check here since the
6607 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6608 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6609 if (IntrinsicID == Intrinsic::not_intrinsic)
6610 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6611 }
6612
6613 if (IntrinsicID != Intrinsic::not_intrinsic) {
6615
6616 // Find out if any arguments are required to be integer constant
6617 // expressions.
6618 unsigned ICEArguments = 0;
6620 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6621 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6622
6623 Function *F = CGM.getIntrinsic(IntrinsicID);
6624 llvm::FunctionType *FTy = F->getFunctionType();
6625
6626 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6627 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6628 // If the intrinsic arg type is different from the builtin arg type
6629 // we need to do a bit cast.
6630 llvm::Type *PTy = FTy->getParamType(i);
6631 if (PTy != ArgValue->getType()) {
6632 // XXX - vector of pointers?
6633 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6634 if (PtrTy->getAddressSpace() !=
6635 ArgValue->getType()->getPointerAddressSpace()) {
6636 ArgValue = Builder.CreateAddrSpaceCast(
6637 ArgValue, llvm::PointerType::get(getLLVMContext(),
6638 PtrTy->getAddressSpace()));
6639 }
6640 }
6641
6642 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6643 // in amx intrinsics.
6644 if (PTy->isX86_AMXTy())
6645 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6646 {ArgValue->getType()}, {ArgValue});
6647 else
6648 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6649 }
6650
6651 Args.push_back(ArgValue);
6652 }
6653
6654 Value *V = Builder.CreateCall(F, Args);
6655 QualType BuiltinRetType = E->getType();
6656
6657 llvm::Type *RetTy = VoidTy;
6658 if (!BuiltinRetType->isVoidType())
6659 RetTy = ConvertType(BuiltinRetType);
6660
6661 if (RetTy != V->getType()) {
6662 // XXX - vector of pointers?
6663 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6664 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6666 V, llvm::PointerType::get(getLLVMContext(),
6667 PtrTy->getAddressSpace()));
6668 }
6669 }
6670
6671 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6672 // in amx intrinsics.
6673 if (V->getType()->isX86_AMXTy())
6674 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6675 {V});
6676 else
6677 V = Builder.CreateBitCast(V, RetTy);
6678 }
6679
6680 if (RetTy->isVoidTy())
6681 return RValue::get(nullptr);
6682
6683 return RValue::get(V);
6684 }
6685
6686 // Some target-specific builtins can have aggregate return values, e.g.
6687 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6688 // ReturnValue to be non-null, so that the target-specific emission code can
6689 // always just emit into it.
6691 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6692 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6693 ReturnValue = ReturnValueSlot(DestPtr, false);
6694 }
6695
6696 // Now see if we can emit a target-specific builtin.
6697 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6698 switch (EvalKind) {
6699 case TEK_Scalar:
6700 if (V->getType()->isVoidTy())
6701 return RValue::get(nullptr);
6702 return RValue::get(V);
6703 case TEK_Aggregate:
6704 return RValue::getAggregate(ReturnValue.getAddress(),
6705 ReturnValue.isVolatile());
6706 case TEK_Complex:
6707 llvm_unreachable("No current target builtin returns complex");
6708 }
6709 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6710 }
6711
6712 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6713 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E, ReturnValue)) {
6714 switch (EvalKind) {
6715 case TEK_Scalar:
6716 if (V->getType()->isVoidTy())
6717 return RValue::get(nullptr);
6718 return RValue::get(V);
6719 case TEK_Aggregate:
6720 return RValue::getAggregate(ReturnValue.getAddress(),
6721 ReturnValue.isVolatile());
6722 case TEK_Complex:
6723 llvm_unreachable("No current hlsl builtin returns complex");
6724 }
6725 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6726 }
6727
6728 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6729 return EmitHipStdParUnsupportedBuiltin(this, FD);
6730
6731 ErrorUnsupported(E, "builtin function");
6732
6733 // Unknown builtin, for now just dump it out and return undef.
6734 return GetUndefRValue(E->getType());
6735}
6736
6738 unsigned BuiltinID, const CallExpr *E,
6739 ReturnValueSlot ReturnValue,
6740 llvm::Triple::ArchType Arch) {
6741 // When compiling in HipStdPar mode we have to be conservative in rejecting
6742 // target specific features in the FE, and defer the possible error to the
6743 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6744 // referenced by an accelerator executable function, we emit an error.
6745 // Returning nullptr here leads to the builtin being handled in
6746 // EmitStdParUnsupportedBuiltin.
6747 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6748 Arch != CGF->getTarget().getTriple().getArch())
6749 return nullptr;
6750
6751 switch (Arch) {
6752 case llvm::Triple::arm:
6753 case llvm::Triple::armeb:
6754 case llvm::Triple::thumb:
6755 case llvm::Triple::thumbeb:
6756 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6757 case llvm::Triple::aarch64:
6758 case llvm::Triple::aarch64_32:
6759 case llvm::Triple::aarch64_be:
6760 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6761 case llvm::Triple::bpfeb:
6762 case llvm::Triple::bpfel:
6763 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6764 case llvm::Triple::x86:
6765 case llvm::Triple::x86_64:
6766 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6767 case llvm::Triple::ppc:
6768 case llvm::Triple::ppcle:
6769 case llvm::Triple::ppc64:
6770 case llvm::Triple::ppc64le:
6771 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6772 case llvm::Triple::r600:
6773 case llvm::Triple::amdgcn:
6774 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6775 case llvm::Triple::systemz:
6776 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6777 case llvm::Triple::nvptx:
6778 case llvm::Triple::nvptx64:
6779 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6780 case llvm::Triple::wasm32:
6781 case llvm::Triple::wasm64:
6782 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6783 case llvm::Triple::hexagon:
6784 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6785 case llvm::Triple::riscv32:
6786 case llvm::Triple::riscv64:
6787 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6788 case llvm::Triple::spirv:
6789 return CGF->EmitSPIRVBuiltinExpr(BuiltinID, E);
6790 case llvm::Triple::spirv64:
6791 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6792 return nullptr;
6793 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6794 default:
6795 return nullptr;
6796 }
6797}
6798
6800 const CallExpr *E,
6801 ReturnValueSlot ReturnValue) {
6802 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6803 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6805 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6806 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6807 }
6808
6809 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6810 getTarget().getTriple().getArch());
6811}
6812
6813static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6814 NeonTypeFlags TypeFlags,
6815 bool HasLegalHalfType = true,
6816 bool V1Ty = false,
6817 bool AllowBFloatArgsAndRet = true) {
6818 int IsQuad = TypeFlags.isQuad();
6819 switch (TypeFlags.getEltType()) {
6822 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6825 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6827 if (AllowBFloatArgsAndRet)
6828 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6829 else
6830 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6832 if (HasLegalHalfType)
6833 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6834 else
6835 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6837 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6840 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6842 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6843 // There is a lot of i128 and f128 API missing.
6844 // so we use v16i8 to represent poly128 and get pattern matched.
6845 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6847 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6849 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6850 }
6851 llvm_unreachable("Unknown vector element type!");
6852}
6853
6854static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6855 NeonTypeFlags IntTypeFlags) {
6856 int IsQuad = IntTypeFlags.isQuad();
6857 switch (IntTypeFlags.getEltType()) {
6859 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6861 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6863 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6864 default:
6865 llvm_unreachable("Type can't be converted to floating-point!");
6866 }
6867}
6868
6870 const ElementCount &Count) {
6871 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6872 return Builder.CreateShuffleVector(V, V, SV, "lane");
6873}
6874
6876 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6877 return EmitNeonSplat(V, C, EC);
6878}
6879
6881 const char *name,
6882 unsigned shift, bool rightshift) {
6883 unsigned j = 0;
6884 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6885 ai != ae; ++ai, ++j) {
6886 if (F->isConstrainedFPIntrinsic())
6887 if (ai->getType()->isMetadataTy())
6888 continue;
6889 if (shift > 0 && shift == j)
6890 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6891 else
6892 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6893 }
6894
6895 if (F->isConstrainedFPIntrinsic())
6896 return Builder.CreateConstrainedFPCall(F, Ops, name);
6897 else
6898 return Builder.CreateCall(F, Ops, name);
6899}
6900
6902 bool neg) {
6903 int SV = cast<ConstantInt>(V)->getSExtValue();
6904 return ConstantInt::get(Ty, neg ? -SV : SV);
6905}
6906
6907// Right-shift a vector by a constant.
6909 llvm::Type *Ty, bool usgn,
6910 const char *name) {
6911 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6912
6913 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6914 int EltSize = VTy->getScalarSizeInBits();
6915
6916 Vec = Builder.CreateBitCast(Vec, Ty);
6917
6918 // lshr/ashr are undefined when the shift amount is equal to the vector
6919 // element size.
6920 if (ShiftAmt == EltSize) {
6921 if (usgn) {
6922 // Right-shifting an unsigned value by its size yields 0.
6923 return llvm::ConstantAggregateZero::get(VTy);
6924 } else {
6925 // Right-shifting a signed value by its size is equivalent
6926 // to a shift of size-1.
6927 --ShiftAmt;
6928 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6929 }
6930 }
6931
6932 Shift = EmitNeonShiftVector(Shift, Ty, false);
6933 if (usgn)
6934 return Builder.CreateLShr(Vec, Shift, name);
6935 else
6936 return Builder.CreateAShr(Vec, Shift, name);
6937}
6938
6939enum {
6940 AddRetType = (1 << 0),
6941 Add1ArgType = (1 << 1),
6942 Add2ArgTypes = (1 << 2),
6943
6946
6948 UnsignedAlts = (1 << 6),
6949
6952
6960
6961namespace {
6962struct ARMVectorIntrinsicInfo {
6963 const char *NameHint;
6964 unsigned BuiltinID;
6965 unsigned LLVMIntrinsic;
6966 unsigned AltLLVMIntrinsic;
6968
6969 bool operator<(unsigned RHSBuiltinID) const {
6970 return BuiltinID < RHSBuiltinID;
6971 }
6972 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6973 return BuiltinID < TE.BuiltinID;
6974 }
6975};
6976} // end anonymous namespace
6977
6978#define NEONMAP0(NameBase) \
6979 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6980
6981#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6982 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6983 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6984
6985#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6986 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6987 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6988 TypeModifier }
6989
6990static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6991 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6992 NEONMAP0(splat_lane_v),
6993 NEONMAP0(splat_laneq_v),
6994 NEONMAP0(splatq_lane_v),
6995 NEONMAP0(splatq_laneq_v),
6996 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6997 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6998 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6999 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
7000 NEONMAP0(vadd_v),
7001 NEONMAP0(vaddhn_v),
7002 NEONMAP0(vaddq_v),
7003 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
7004 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
7005 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
7006 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
7007 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
7008 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
7009 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
7010 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
7011 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
7012 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
7013 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
7014 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
7015 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
7016 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
7017 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
7018 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
7019 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
7020 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
7021 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
7022 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
7023 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
7024 NEONMAP1(vcage_v, arm_neon_vacge, 0),
7025 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
7026 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
7027 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
7028 NEONMAP1(vcale_v, arm_neon_vacge, 0),
7029 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
7030 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
7031 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
7032 NEONMAP0(vceqz_v),
7033 NEONMAP0(vceqzq_v),
7034 NEONMAP0(vcgez_v),
7035 NEONMAP0(vcgezq_v),
7036 NEONMAP0(vcgtz_v),
7037 NEONMAP0(vcgtzq_v),
7038 NEONMAP0(vclez_v),
7039 NEONMAP0(vclezq_v),
7040 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
7041 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
7042 NEONMAP0(vcltz_v),
7043 NEONMAP0(vcltzq_v),
7044 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7045 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7046 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7047 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7048 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
7049 NEONMAP0(vcvt_f16_s16),
7050 NEONMAP0(vcvt_f16_u16),
7051 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
7052 NEONMAP0(vcvt_f32_v),
7053 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
7054 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
7055 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
7056 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
7057 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
7058 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
7059 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
7060 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
7061 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
7062 NEONMAP0(vcvt_s16_f16),
7063 NEONMAP0(vcvt_s32_v),
7064 NEONMAP0(vcvt_s64_v),
7065 NEONMAP0(vcvt_u16_f16),
7066 NEONMAP0(vcvt_u32_v),
7067 NEONMAP0(vcvt_u64_v),
7068 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
7069 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
7070 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
7071 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
7072 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
7073 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
7074 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
7075 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
7076 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
7077 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
7078 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
7079 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
7080 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
7081 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
7082 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
7083 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
7084 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
7085 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
7086 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
7087 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
7088 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
7089 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
7090 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
7091 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
7092 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
7093 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
7094 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
7095 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
7096 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
7097 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
7098 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
7099 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
7100 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
7101 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
7102 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
7103 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
7104 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
7105 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
7106 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
7107 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
7108 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
7109 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
7110 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
7111 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
7112 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
7113 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
7114 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
7115 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
7116 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
7117 NEONMAP0(vcvtq_f16_s16),
7118 NEONMAP0(vcvtq_f16_u16),
7119 NEONMAP0(vcvtq_f32_v),
7120 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
7121 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
7122 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
7123 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
7124 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
7125 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
7126 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
7127 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
7128 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
7129 NEONMAP0(vcvtq_s16_f16),
7130 NEONMAP0(vcvtq_s32_v),
7131 NEONMAP0(vcvtq_s64_v),
7132 NEONMAP0(vcvtq_u16_f16),
7133 NEONMAP0(vcvtq_u32_v),
7134 NEONMAP0(vcvtq_u64_v),
7135 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
7136 NEONMAP1(vdot_u32, arm_neon_udot, 0),
7137 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
7138 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
7139 NEONMAP0(vext_v),
7140 NEONMAP0(vextq_v),
7141 NEONMAP0(vfma_v),
7142 NEONMAP0(vfmaq_v),
7143 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7144 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7145 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7146 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7147 NEONMAP0(vld1_dup_v),
7148 NEONMAP1(vld1_v, arm_neon_vld1, 0),
7149 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
7150 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
7151 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
7152 NEONMAP0(vld1q_dup_v),
7153 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
7154 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
7155 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
7156 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
7157 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
7158 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
7159 NEONMAP1(vld2_v, arm_neon_vld2, 0),
7160 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
7161 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
7162 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
7163 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
7164 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
7165 NEONMAP1(vld3_v, arm_neon_vld3, 0),
7166 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
7167 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
7168 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
7169 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
7170 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
7171 NEONMAP1(vld4_v, arm_neon_vld4, 0),
7172 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
7173 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
7174 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
7175 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7176 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
7177 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
7178 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7179 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7180 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
7181 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
7182 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7183 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
7184 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
7185 NEONMAP0(vmovl_v),
7186 NEONMAP0(vmovn_v),
7187 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
7188 NEONMAP0(vmull_v),
7189 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
7190 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7191 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7192 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
7193 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7194 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7195 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
7196 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
7197 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
7198 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
7199 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
7200 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7201 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7202 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
7203 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
7204 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
7205 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
7206 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
7207 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
7208 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
7209 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
7210 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
7211 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
7212 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
7213 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
7214 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
7215 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
7216 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
7217 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
7218 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
7219 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
7220 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
7221 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7222 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7223 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7224 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7225 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7226 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7227 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
7228 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
7229 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7230 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7231 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
7232 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7233 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7234 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
7235 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
7236 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7237 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7238 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
7239 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
7240 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
7241 NEONMAP0(vrndi_v),
7242 NEONMAP0(vrndiq_v),
7243 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
7244 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
7245 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
7246 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
7247 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
7248 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
7249 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
7250 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
7251 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
7252 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7253 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7254 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7255 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7256 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7257 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7258 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
7259 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
7260 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
7261 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
7262 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
7263 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
7264 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
7265 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
7266 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
7267 NEONMAP0(vshl_n_v),
7268 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7269 NEONMAP0(vshll_n_v),
7270 NEONMAP0(vshlq_n_v),
7271 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7272 NEONMAP0(vshr_n_v),
7273 NEONMAP0(vshrn_n_v),
7274 NEONMAP0(vshrq_n_v),
7275 NEONMAP1(vst1_v, arm_neon_vst1, 0),
7276 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
7277 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
7278 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
7279 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
7280 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
7281 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
7282 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
7283 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
7284 NEONMAP1(vst2_v, arm_neon_vst2, 0),
7285 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
7286 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
7287 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
7288 NEONMAP1(vst3_v, arm_neon_vst3, 0),
7289 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
7290 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
7291 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
7292 NEONMAP1(vst4_v, arm_neon_vst4, 0),
7293 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
7294 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
7295 NEONMAP0(vsubhn_v),
7296 NEONMAP0(vtrn_v),
7297 NEONMAP0(vtrnq_v),
7298 NEONMAP0(vtst_v),
7299 NEONMAP0(vtstq_v),
7300 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
7301 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
7302 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
7303 NEONMAP0(vuzp_v),
7304 NEONMAP0(vuzpq_v),
7305 NEONMAP0(vzip_v),
7306 NEONMAP0(vzipq_v)
7307};
7308
7309static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7310 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
7311 NEONMAP0(splat_lane_v),
7312 NEONMAP0(splat_laneq_v),
7313 NEONMAP0(splatq_lane_v),
7314 NEONMAP0(splatq_laneq_v),
7315 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
7316 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
7317 NEONMAP0(vadd_v),
7318 NEONMAP0(vaddhn_v),
7319 NEONMAP0(vaddq_p128),
7320 NEONMAP0(vaddq_v),
7321 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
7322 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
7323 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
7324 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
7325 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7326 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7327 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7328 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7329 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7330 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7331 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7332 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7333 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
7334 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
7335 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
7336 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
7337 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
7338 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7339 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7340 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7341 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7342 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7343 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7344 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
7345 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7346 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7347 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
7348 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
7349 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
7350 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
7351 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
7352 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
7353 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
7354 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
7355 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
7356 NEONMAP0(vceqz_v),
7357 NEONMAP0(vceqzq_v),
7358 NEONMAP0(vcgez_v),
7359 NEONMAP0(vcgezq_v),
7360 NEONMAP0(vcgtz_v),
7361 NEONMAP0(vcgtzq_v),
7362 NEONMAP0(vclez_v),
7363 NEONMAP0(vclezq_v),
7364 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
7365 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
7366 NEONMAP0(vcltz_v),
7367 NEONMAP0(vcltzq_v),
7368 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7369 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7370 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7371 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7372 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7373 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7374 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7375 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7376 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7377 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7378 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7379 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7380 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
7381 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7382 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7383 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
7384 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7385 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7386 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
7387 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7388 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7389 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
7390 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7391 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7392 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
7393 NEONMAP0(vcvt_f16_s16),
7394 NEONMAP0(vcvt_f16_u16),
7395 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
7396 NEONMAP0(vcvt_f32_v),
7397 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7398 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7399 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7400 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7401 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7402 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7403 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7404 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7405 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7406 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7407 NEONMAP0(vcvtq_f16_s16),
7408 NEONMAP0(vcvtq_f16_u16),
7409 NEONMAP0(vcvtq_f32_v),
7410 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
7411 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7412 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7413 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7414 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7415 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7416 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7417 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7418 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7419 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7420 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7421 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
7422 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
7423 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
7424 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
7425 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
7426 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7427 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7428 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7429 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7430 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7431 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7432 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7433 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7434 NEONMAP0(vext_v),
7435 NEONMAP0(vextq_v),
7436 NEONMAP0(vfma_v),
7437 NEONMAP0(vfmaq_v),
7438 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
7439 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
7440 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
7441 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
7442 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
7443 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
7444 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
7445 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
7446 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7447 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7448 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7449 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7450 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
7451 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
7452 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
7453 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
7454 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
7455 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
7456 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
7457 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
7458 NEONMAP0(vmovl_v),
7459 NEONMAP0(vmovn_v),
7460 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
7461 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
7462 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
7463 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7464 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7465 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
7466 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
7467 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
7468 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7469 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7470 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
7471 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
7472 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
7473 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7474 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
7475 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
7476 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7477 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
7478 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
7479 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
7480 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
7481 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
7482 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7483 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7484 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7485 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7486 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7487 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7488 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7489 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7490 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7491 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7492 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7493 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7494 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7495 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7496 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7497 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7498 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7499 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7500 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7501 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7502 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7503 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7504 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7505 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7506 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7507 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7508 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7509 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7510 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7511 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7512 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7513 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7514 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7515 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7516 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7517 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7518 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7519 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7520 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7521 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7522 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7523 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7524 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7525 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7526 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7527 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7528 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7529 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7530 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7531 NEONMAP0(vrndi_v),
7532 NEONMAP0(vrndiq_v),
7533 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7534 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7535 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7536 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7537 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7538 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7539 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7540 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7541 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7542 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7543 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7544 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7545 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7546 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7547 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7548 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7549 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7550 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7551 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7552 NEONMAP0(vshl_n_v),
7553 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7554 NEONMAP0(vshll_n_v),
7555 NEONMAP0(vshlq_n_v),
7556 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7557 NEONMAP0(vshr_n_v),
7558 NEONMAP0(vshrn_n_v),
7559 NEONMAP0(vshrq_n_v),
7560 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7561 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7562 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7563 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7564 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7565 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7566 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7567 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7568 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7569 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7570 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7571 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7572 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7573 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7574 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7575 NEONMAP0(vsubhn_v),
7576 NEONMAP0(vtst_v),
7577 NEONMAP0(vtstq_v),
7578 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7579 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7580 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7581 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7582};
7583
7584static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7585 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7586 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7587 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7588 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7589 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7590 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7591 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7592 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7593 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7594 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7595 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7596 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7597 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7598 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7599 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7600 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7601 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7602 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7603 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7604 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7605 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7606 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7607 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7608 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7609 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7610 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7611 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7612 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7613 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7614 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7615 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7616 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7617 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7618 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7619 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7620 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7621 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7622 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7623 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7624 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7625 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7626 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7627 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7628 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7629 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7630 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7631 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7632 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7633 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7634 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7635 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7636 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7637 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7638 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7639 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7640 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7641 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7642 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7643 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7644 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7645 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7646 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7647 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7648 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7649 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7650 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7651 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7652 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7653 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7654 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7655 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7656 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7657 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7658 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7659 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7660 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7661 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7662 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7663 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7664 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7665 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7666 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7667 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7668 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7669 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7670 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7671 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7672 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7673 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7674 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7675 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7676 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7677 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7678 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7679 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7680 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7681 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7682 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7683 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7684 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7685 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7686 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7687 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7688 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7689 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7690 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7691 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7692 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7693 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7694 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7695 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7696 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7697 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7698 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7699 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7700 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7701 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7702 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7703 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7704 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7705 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7706 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7707 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7708 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7709 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7710 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7711 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7712 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7713 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7714 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7715 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7716 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7717 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7718 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7719 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7720 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7721 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7722 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7723 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7724 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7725 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7726 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7727 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7728 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7729 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7730 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7731 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7732 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7733 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7734 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7735 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7736 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7737 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7738 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7739 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7740 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7741 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7742 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7743 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7744 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7745 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7746 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7747 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7748 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7749 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7750 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7751 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7752 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7753 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7754 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7755 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7756 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7757 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7758 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7759 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7760 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7761 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7762 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7763 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7764 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7765 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7766 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7767 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7768 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7769 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7770 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7771 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7772 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7773 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7774 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7775 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7776 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7777 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7778 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7779 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7780 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7781 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7782 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7783 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7784 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7785 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7786 // FP16 scalar intrinisics go here.
7787 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7788 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7789 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7790 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7791 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7792 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7793 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7794 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7795 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7796 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7797 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7798 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7799 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7800 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7801 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7802 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7803 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7804 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7805 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7806 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7807 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7808 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7809 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7810 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7811 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7812 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7813 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7814 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7815 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7816 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7817 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7818 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7819 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7820 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7821};
7822
7823// Some intrinsics are equivalent for codegen.
7824static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7825 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7826 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7827 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7828 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7829 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7830 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7831 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7832 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7833 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7834 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7835 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7836 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7837 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7838 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7839 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7840 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7841 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7842 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7843 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7844 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7845 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7846 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7847 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7848 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7849 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7850 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7851 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7852 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7853 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7854 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7855 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7856 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7857 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7858 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7859 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7860 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7861 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7862 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7863 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7864 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7865 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7866 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7867 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7868 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7869 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7870 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7871 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7872 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7873 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7874 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7875 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7876 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7877 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7878 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7879 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7880 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7881 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7882 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7883 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7884 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7885 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7886 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7887 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7888 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7889 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7890 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7891 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7892 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7893 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7894 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7895 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7896 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7897 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7898 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7899 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7900 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7901 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7902 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7903 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7904 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7905 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7906 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7907 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7908 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7909 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7910 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7911 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7912 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7913 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7914 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7915 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7916 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7917 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7918 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7919 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7920 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7921 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7922 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7923 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7924 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7925 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7926 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7927 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7928 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7929 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7930 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7931 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7932 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7933 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7934 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7935 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7936 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7937 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7938 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7939 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7940 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7941 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7942 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7943 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7944 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7945 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7946 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7947 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7948 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7949 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7950 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7951 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7952 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7953 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7954 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7955 // arbitrary one to be handled as tha canonical variation.
7956 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7957 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7958 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7959 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7960 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7961 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7962 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7963 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7964 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7965 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7966 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7967 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7968};
7969
7970#undef NEONMAP0
7971#undef NEONMAP1
7972#undef NEONMAP2
7973
7974#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7975 { \
7976 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7977 TypeModifier \
7978 }
7979
7980#define SVEMAP2(NameBase, TypeModifier) \
7981 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7982static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7983#define GET_SVE_LLVM_INTRINSIC_MAP
7984#include "clang/Basic/arm_sve_builtin_cg.inc"
7985#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7986#undef GET_SVE_LLVM_INTRINSIC_MAP
7987};
7988
7989#undef SVEMAP1
7990#undef SVEMAP2
7991
7992#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7993 { \
7994 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7995 TypeModifier \
7996 }
7997
7998#define SMEMAP2(NameBase, TypeModifier) \
7999 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
8000static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
8001#define GET_SME_LLVM_INTRINSIC_MAP
8002#include "clang/Basic/arm_sme_builtin_cg.inc"
8003#undef GET_SME_LLVM_INTRINSIC_MAP
8004};
8005
8006#undef SMEMAP1
8007#undef SMEMAP2
8008
8010
8015
8016static const ARMVectorIntrinsicInfo *
8018 unsigned BuiltinID, bool &MapProvenSorted) {
8019
8020#ifndef NDEBUG
8021 if (!MapProvenSorted) {
8022 assert(llvm::is_sorted(IntrinsicMap));
8023 MapProvenSorted = true;
8024 }
8025#endif
8026
8027 const ARMVectorIntrinsicInfo *Builtin =
8028 llvm::lower_bound(IntrinsicMap, BuiltinID);
8029
8030 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
8031 return Builtin;
8032
8033 return nullptr;
8034}
8035
8037 unsigned Modifier,
8038 llvm::Type *ArgType,
8039 const CallExpr *E) {
8040 int VectorSize = 0;
8041 if (Modifier & Use64BitVectors)
8042 VectorSize = 64;
8043 else if (Modifier & Use128BitVectors)
8044 VectorSize = 128;
8045
8046 // Return type.
8048 if (Modifier & AddRetType) {
8049 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
8050 if (Modifier & VectorizeRetType)
8051 Ty = llvm::FixedVectorType::get(
8052 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
8053
8054 Tys.push_back(Ty);
8055 }
8056
8057 // Arguments.
8058 if (Modifier & VectorizeArgTypes) {
8059 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
8060 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
8061 }
8062
8063 if (Modifier & (Add1ArgType | Add2ArgTypes))
8064 Tys.push_back(ArgType);
8065
8066 if (Modifier & Add2ArgTypes)
8067 Tys.push_back(ArgType);
8068
8069 if (Modifier & InventFloatType)
8070 Tys.push_back(FloatTy);
8071
8072 return CGM.getIntrinsic(IntrinsicID, Tys);
8073}
8074
8076 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
8077 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
8078 unsigned BuiltinID = SISDInfo.BuiltinID;
8079 unsigned int Int = SISDInfo.LLVMIntrinsic;
8080 unsigned Modifier = SISDInfo.TypeModifier;
8081 const char *s = SISDInfo.NameHint;
8082
8083 switch (BuiltinID) {
8084 case NEON::BI__builtin_neon_vcled_s64:
8085 case NEON::BI__builtin_neon_vcled_u64:
8086 case NEON::BI__builtin_neon_vcles_f32:
8087 case NEON::BI__builtin_neon_vcled_f64:
8088 case NEON::BI__builtin_neon_vcltd_s64:
8089 case NEON::BI__builtin_neon_vcltd_u64:
8090 case NEON::BI__builtin_neon_vclts_f32:
8091 case NEON::BI__builtin_neon_vcltd_f64:
8092 case NEON::BI__builtin_neon_vcales_f32:
8093 case NEON::BI__builtin_neon_vcaled_f64:
8094 case NEON::BI__builtin_neon_vcalts_f32:
8095 case NEON::BI__builtin_neon_vcaltd_f64:
8096 // Only one direction of comparisons actually exist, cmle is actually a cmge
8097 // with swapped operands. The table gives us the right intrinsic but we
8098 // still need to do the swap.
8099 std::swap(Ops[0], Ops[1]);
8100 break;
8101 }
8102
8103 assert(Int && "Generic code assumes a valid intrinsic");
8104
8105 // Determine the type(s) of this overloaded AArch64 intrinsic.
8106 const Expr *Arg = E->getArg(0);
8107 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
8108 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
8109
8110 int j = 0;
8111 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
8112 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
8113 ai != ae; ++ai, ++j) {
8114 llvm::Type *ArgTy = ai->getType();
8115 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
8116 ArgTy->getPrimitiveSizeInBits())
8117 continue;
8118
8119 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
8120 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
8121 // it before inserting.
8122 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
8123 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
8124 Ops[j] =
8125 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
8126 }
8127
8128 Value *Result = CGF.EmitNeonCall(F, Ops, s);
8129 llvm::Type *ResultType = CGF.ConvertType(E->getType());
8130 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
8131 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
8132 return CGF.Builder.CreateExtractElement(Result, C0);
8133
8134 return CGF.Builder.CreateBitCast(Result, ResultType, s);
8135}
8136
8138 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
8139 const char *NameHint, unsigned Modifier, const CallExpr *E,
8140 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
8141 llvm::Triple::ArchType Arch) {
8142 // Get the last argument, which specifies the vector type.
8143 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
8144 std::optional<llvm::APSInt> NeonTypeConst =
8146 if (!NeonTypeConst)
8147 return nullptr;
8148
8149 // Determine the type of this overloaded NEON intrinsic.
8150 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
8151 bool Usgn = Type.isUnsigned();
8152 bool Quad = Type.isQuad();
8153 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
8154 const bool AllowBFloatArgsAndRet =
8155 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
8156
8157 llvm::FixedVectorType *VTy =
8158 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
8159 llvm::Type *Ty = VTy;
8160 if (!Ty)
8161 return nullptr;
8162
8163 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8164 return Builder.getInt32(addr.getAlignment().getQuantity());
8165 };
8166
8167 unsigned Int = LLVMIntrinsic;
8168 if ((Modifier & UnsignedAlts) && !Usgn)
8169 Int = AltLLVMIntrinsic;
8170
8171 switch (BuiltinID) {
8172 default: break;
8173 case NEON::BI__builtin_neon_splat_lane_v:
8174 case NEON::BI__builtin_neon_splat_laneq_v:
8175 case NEON::BI__builtin_neon_splatq_lane_v:
8176 case NEON::BI__builtin_neon_splatq_laneq_v: {
8177 auto NumElements = VTy->getElementCount();
8178 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
8179 NumElements = NumElements * 2;
8180 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
8181 NumElements = NumElements.divideCoefficientBy(2);
8182
8183 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8184 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
8185 }
8186 case NEON::BI__builtin_neon_vpadd_v:
8187 case NEON::BI__builtin_neon_vpaddq_v:
8188 // We don't allow fp/int overloading of intrinsics.
8189 if (VTy->getElementType()->isFloatingPointTy() &&
8190 Int == Intrinsic::aarch64_neon_addp)
8191 Int = Intrinsic::aarch64_neon_faddp;
8192 break;
8193 case NEON::BI__builtin_neon_vabs_v:
8194 case NEON::BI__builtin_neon_vabsq_v:
8195 if (VTy->getElementType()->isFloatingPointTy())
8196 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
8197 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
8198 case NEON::BI__builtin_neon_vadd_v:
8199 case NEON::BI__builtin_neon_vaddq_v: {
8200 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
8201 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8202 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8203 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
8204 return Builder.CreateBitCast(Ops[0], Ty);
8205 }
8206 case NEON::BI__builtin_neon_vaddhn_v: {
8207 llvm::FixedVectorType *SrcTy =
8208 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8209
8210 // %sum = add <4 x i32> %lhs, %rhs
8211 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8212 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8213 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
8214
8215 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8216 Constant *ShiftAmt =
8217 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8218 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
8219
8220 // %res = trunc <4 x i32> %high to <4 x i16>
8221 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
8222 }
8223 case NEON::BI__builtin_neon_vcale_v:
8224 case NEON::BI__builtin_neon_vcaleq_v:
8225 case NEON::BI__builtin_neon_vcalt_v:
8226 case NEON::BI__builtin_neon_vcaltq_v:
8227 std::swap(Ops[0], Ops[1]);
8228 [[fallthrough]];
8229 case NEON::BI__builtin_neon_vcage_v:
8230 case NEON::BI__builtin_neon_vcageq_v:
8231 case NEON::BI__builtin_neon_vcagt_v:
8232 case NEON::BI__builtin_neon_vcagtq_v: {
8233 llvm::Type *Ty;
8234 switch (VTy->getScalarSizeInBits()) {
8235 default: llvm_unreachable("unexpected type");
8236 case 32:
8237 Ty = FloatTy;
8238 break;
8239 case 64:
8240 Ty = DoubleTy;
8241 break;
8242 case 16:
8243 Ty = HalfTy;
8244 break;
8245 }
8246 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
8247 llvm::Type *Tys[] = { VTy, VecFlt };
8248 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8249 return EmitNeonCall(F, Ops, NameHint);
8250 }
8251 case NEON::BI__builtin_neon_vceqz_v:
8252 case NEON::BI__builtin_neon_vceqzq_v:
8253 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
8254 ICmpInst::ICMP_EQ, "vceqz");
8255 case NEON::BI__builtin_neon_vcgez_v:
8256 case NEON::BI__builtin_neon_vcgezq_v:
8257 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
8258 ICmpInst::ICMP_SGE, "vcgez");
8259 case NEON::BI__builtin_neon_vclez_v:
8260 case NEON::BI__builtin_neon_vclezq_v:
8261 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
8262 ICmpInst::ICMP_SLE, "vclez");
8263 case NEON::BI__builtin_neon_vcgtz_v:
8264 case NEON::BI__builtin_neon_vcgtzq_v:
8265 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
8266 ICmpInst::ICMP_SGT, "vcgtz");
8267 case NEON::BI__builtin_neon_vcltz_v:
8268 case NEON::BI__builtin_neon_vcltzq_v:
8269 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
8270 ICmpInst::ICMP_SLT, "vcltz");
8271 case NEON::BI__builtin_neon_vclz_v:
8272 case NEON::BI__builtin_neon_vclzq_v:
8273 // We generate target-independent intrinsic, which needs a second argument
8274 // for whether or not clz of zero is undefined; on ARM it isn't.
8275 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
8276 break;
8277 case NEON::BI__builtin_neon_vcvt_f32_v:
8278 case NEON::BI__builtin_neon_vcvtq_f32_v:
8279 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8280 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
8281 HasLegalHalfType);
8282 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8283 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8284 case NEON::BI__builtin_neon_vcvt_f16_s16:
8285 case NEON::BI__builtin_neon_vcvt_f16_u16:
8286 case NEON::BI__builtin_neon_vcvtq_f16_s16:
8287 case NEON::BI__builtin_neon_vcvtq_f16_u16:
8288 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8289 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
8290 HasLegalHalfType);
8291 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8292 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8293 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
8294 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
8295 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
8296 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
8297 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8298 Function *F = CGM.getIntrinsic(Int, Tys);
8299 return EmitNeonCall(F, Ops, "vcvt_n");
8300 }
8301 case NEON::BI__builtin_neon_vcvt_n_f32_v:
8302 case NEON::BI__builtin_neon_vcvt_n_f64_v:
8303 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
8304 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
8305 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8306 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
8307 Function *F = CGM.getIntrinsic(Int, Tys);
8308 return EmitNeonCall(F, Ops, "vcvt_n");
8309 }
8310 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
8311 case NEON::BI__builtin_neon_vcvt_n_s32_v:
8312 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
8313 case NEON::BI__builtin_neon_vcvt_n_u32_v:
8314 case NEON::BI__builtin_neon_vcvt_n_s64_v:
8315 case NEON::BI__builtin_neon_vcvt_n_u64_v:
8316 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
8317 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
8318 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
8319 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
8320 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
8321 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
8322 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8323 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8324 return EmitNeonCall(F, Ops, "vcvt_n");
8325 }
8326 case NEON::BI__builtin_neon_vcvt_s32_v:
8327 case NEON::BI__builtin_neon_vcvt_u32_v:
8328 case NEON::BI__builtin_neon_vcvt_s64_v:
8329 case NEON::BI__builtin_neon_vcvt_u64_v:
8330 case NEON::BI__builtin_neon_vcvt_s16_f16:
8331 case NEON::BI__builtin_neon_vcvt_u16_f16:
8332 case NEON::BI__builtin_neon_vcvtq_s32_v:
8333 case NEON::BI__builtin_neon_vcvtq_u32_v:
8334 case NEON::BI__builtin_neon_vcvtq_s64_v:
8335 case NEON::BI__builtin_neon_vcvtq_u64_v:
8336 case NEON::BI__builtin_neon_vcvtq_s16_f16:
8337 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
8338 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8339 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
8340 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
8341 }
8342 case NEON::BI__builtin_neon_vcvta_s16_f16:
8343 case NEON::BI__builtin_neon_vcvta_s32_v:
8344 case NEON::BI__builtin_neon_vcvta_s64_v:
8345 case NEON::BI__builtin_neon_vcvta_u16_f16:
8346 case NEON::BI__builtin_neon_vcvta_u32_v:
8347 case NEON::BI__builtin_neon_vcvta_u64_v:
8348 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
8349 case NEON::BI__builtin_neon_vcvtaq_s32_v:
8350 case NEON::BI__builtin_neon_vcvtaq_s64_v:
8351 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
8352 case NEON::BI__builtin_neon_vcvtaq_u32_v:
8353 case NEON::BI__builtin_neon_vcvtaq_u64_v:
8354 case NEON::BI__builtin_neon_vcvtn_s16_f16:
8355 case NEON::BI__builtin_neon_vcvtn_s32_v:
8356 case NEON::BI__builtin_neon_vcvtn_s64_v:
8357 case NEON::BI__builtin_neon_vcvtn_u16_f16:
8358 case NEON::BI__builtin_neon_vcvtn_u32_v:
8359 case NEON::BI__builtin_neon_vcvtn_u64_v:
8360 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
8361 case NEON::BI__builtin_neon_vcvtnq_s32_v:
8362 case NEON::BI__builtin_neon_vcvtnq_s64_v:
8363 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
8364 case NEON::BI__builtin_neon_vcvtnq_u32_v:
8365 case NEON::BI__builtin_neon_vcvtnq_u64_v:
8366 case NEON::BI__builtin_neon_vcvtp_s16_f16:
8367 case NEON::BI__builtin_neon_vcvtp_s32_v:
8368 case NEON::BI__builtin_neon_vcvtp_s64_v:
8369 case NEON::BI__builtin_neon_vcvtp_u16_f16:
8370 case NEON::BI__builtin_neon_vcvtp_u32_v:
8371 case NEON::BI__builtin_neon_vcvtp_u64_v:
8372 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
8373 case NEON::BI__builtin_neon_vcvtpq_s32_v:
8374 case NEON::BI__builtin_neon_vcvtpq_s64_v:
8375 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
8376 case NEON::BI__builtin_neon_vcvtpq_u32_v:
8377 case NEON::BI__builtin_neon_vcvtpq_u64_v:
8378 case NEON::BI__builtin_neon_vcvtm_s16_f16:
8379 case NEON::BI__builtin_neon_vcvtm_s32_v:
8380 case NEON::BI__builtin_neon_vcvtm_s64_v:
8381 case NEON::BI__builtin_neon_vcvtm_u16_f16:
8382 case NEON::BI__builtin_neon_vcvtm_u32_v:
8383 case NEON::BI__builtin_neon_vcvtm_u64_v:
8384 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
8385 case NEON::BI__builtin_neon_vcvtmq_s32_v:
8386 case NEON::BI__builtin_neon_vcvtmq_s64_v:
8387 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
8388 case NEON::BI__builtin_neon_vcvtmq_u32_v:
8389 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8390 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8391 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8392 }
8393 case NEON::BI__builtin_neon_vcvtx_f32_v: {
8394 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
8395 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8396
8397 }
8398 case NEON::BI__builtin_neon_vext_v:
8399 case NEON::BI__builtin_neon_vextq_v: {
8400 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
8401 SmallVector<int, 16> Indices;
8402 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8403 Indices.push_back(i+CV);
8404
8405 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8406 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8407 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
8408 }
8409 case NEON::BI__builtin_neon_vfma_v:
8410 case NEON::BI__builtin_neon_vfmaq_v: {
8411 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8412 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8413 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8414
8415 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
8417 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
8418 {Ops[1], Ops[2], Ops[0]});
8419 }
8420 case NEON::BI__builtin_neon_vld1_v:
8421 case NEON::BI__builtin_neon_vld1q_v: {
8422 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8423 Ops.push_back(getAlignmentValue32(PtrOp0));
8424 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
8425 }
8426 case NEON::BI__builtin_neon_vld1_x2_v:
8427 case NEON::BI__builtin_neon_vld1q_x2_v:
8428 case NEON::BI__builtin_neon_vld1_x3_v:
8429 case NEON::BI__builtin_neon_vld1q_x3_v:
8430 case NEON::BI__builtin_neon_vld1_x4_v:
8431 case NEON::BI__builtin_neon_vld1q_x4_v: {
8432 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8433 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8434 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
8435 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8436 }
8437 case NEON::BI__builtin_neon_vld2_v:
8438 case NEON::BI__builtin_neon_vld2q_v:
8439 case NEON::BI__builtin_neon_vld3_v:
8440 case NEON::BI__builtin_neon_vld3q_v:
8441 case NEON::BI__builtin_neon_vld4_v:
8442 case NEON::BI__builtin_neon_vld4q_v:
8443 case NEON::BI__builtin_neon_vld2_dup_v:
8444 case NEON::BI__builtin_neon_vld2q_dup_v:
8445 case NEON::BI__builtin_neon_vld3_dup_v:
8446 case NEON::BI__builtin_neon_vld3q_dup_v:
8447 case NEON::BI__builtin_neon_vld4_dup_v:
8448 case NEON::BI__builtin_neon_vld4q_dup_v: {
8449 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8450 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8451 Value *Align = getAlignmentValue32(PtrOp1);
8452 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
8453 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8454 }
8455 case NEON::BI__builtin_neon_vld1_dup_v:
8456 case NEON::BI__builtin_neon_vld1q_dup_v: {
8457 Value *V = PoisonValue::get(Ty);
8458 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8459 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
8460 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
8461 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
8462 return EmitNeonSplat(Ops[0], CI);
8463 }
8464 case NEON::BI__builtin_neon_vld2_lane_v:
8465 case NEON::BI__builtin_neon_vld2q_lane_v:
8466 case NEON::BI__builtin_neon_vld3_lane_v:
8467 case NEON::BI__builtin_neon_vld3q_lane_v:
8468 case NEON::BI__builtin_neon_vld4_lane_v:
8469 case NEON::BI__builtin_neon_vld4q_lane_v: {
8470 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8471 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8472 for (unsigned I = 2; I < Ops.size() - 1; ++I)
8473 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
8474 Ops.push_back(getAlignmentValue32(PtrOp1));
8475 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
8476 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8477 }
8478 case NEON::BI__builtin_neon_vmovl_v: {
8479 llvm::FixedVectorType *DTy =
8480 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8481 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
8482 if (Usgn)
8483 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8484 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8485 }
8486 case NEON::BI__builtin_neon_vmovn_v: {
8487 llvm::FixedVectorType *QTy =
8488 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8489 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8490 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8491 }
8492 case NEON::BI__builtin_neon_vmull_v:
8493 // FIXME: the integer vmull operations could be emitted in terms of pure
8494 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8495 // hoisting the exts outside loops. Until global ISel comes along that can
8496 // see through such movement this leads to bad CodeGen. So we need an
8497 // intrinsic for now.
8498 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8499 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8500 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8501 case NEON::BI__builtin_neon_vpadal_v:
8502 case NEON::BI__builtin_neon_vpadalq_v: {
8503 // The source operand type has twice as many elements of half the size.
8504 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8505 llvm::Type *EltTy =
8506 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8507 auto *NarrowTy =
8508 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8509 llvm::Type *Tys[2] = { Ty, NarrowTy };
8510 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8511 }
8512 case NEON::BI__builtin_neon_vpaddl_v:
8513 case NEON::BI__builtin_neon_vpaddlq_v: {
8514 // The source operand type has twice as many elements of half the size.
8515 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8516 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8517 auto *NarrowTy =
8518 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8519 llvm::Type *Tys[2] = { Ty, NarrowTy };
8520 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8521 }
8522 case NEON::BI__builtin_neon_vqdmlal_v:
8523 case NEON::BI__builtin_neon_vqdmlsl_v: {
8524 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8525 Ops[1] =
8526 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8527 Ops.resize(2);
8528 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8529 }
8530 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8531 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8532 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8533 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8534 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8535 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8536 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8537 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8538 RTy->getNumElements() * 2);
8539 llvm::Type *Tys[2] = {
8540 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8541 /*isQuad*/ false))};
8542 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8543 }
8544 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8545 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8546 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8547 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8548 llvm::Type *Tys[2] = {
8549 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8550 /*isQuad*/ true))};
8551 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8552 }
8553 case NEON::BI__builtin_neon_vqshl_n_v:
8554 case NEON::BI__builtin_neon_vqshlq_n_v:
8555 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8556 1, false);
8557 case NEON::BI__builtin_neon_vqshlu_n_v:
8558 case NEON::BI__builtin_neon_vqshluq_n_v:
8559 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8560 1, false);
8561 case NEON::BI__builtin_neon_vrecpe_v:
8562 case NEON::BI__builtin_neon_vrecpeq_v:
8563 case NEON::BI__builtin_neon_vrsqrte_v:
8564 case NEON::BI__builtin_neon_vrsqrteq_v:
8565 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8566 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8567 case NEON::BI__builtin_neon_vrndi_v:
8568 case NEON::BI__builtin_neon_vrndiq_v:
8569 Int = Builder.getIsFPConstrained()
8570 ? Intrinsic::experimental_constrained_nearbyint
8571 : Intrinsic::nearbyint;
8572 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8573 case NEON::BI__builtin_neon_vrshr_n_v:
8574 case NEON::BI__builtin_neon_vrshrq_n_v:
8575 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8576 1, true);
8577 case NEON::BI__builtin_neon_vsha512hq_u64:
8578 case NEON::BI__builtin_neon_vsha512h2q_u64:
8579 case NEON::BI__builtin_neon_vsha512su0q_u64:
8580 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8581 Function *F = CGM.getIntrinsic(Int);
8582 return EmitNeonCall(F, Ops, "");
8583 }
8584 case NEON::BI__builtin_neon_vshl_n_v:
8585 case NEON::BI__builtin_neon_vshlq_n_v:
8586 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8587 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8588 "vshl_n");
8589 case NEON::BI__builtin_neon_vshll_n_v: {
8590 llvm::FixedVectorType *SrcTy =
8591 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8592 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8593 if (Usgn)
8594 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8595 else
8596 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8597 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8598 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8599 }
8600 case NEON::BI__builtin_neon_vshrn_n_v: {
8601 llvm::FixedVectorType *SrcTy =
8602 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8603 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8604 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8605 if (Usgn)
8606 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8607 else
8608 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8609 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8610 }
8611 case NEON::BI__builtin_neon_vshr_n_v:
8612 case NEON::BI__builtin_neon_vshrq_n_v:
8613 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8614 case NEON::BI__builtin_neon_vst1_v:
8615 case NEON::BI__builtin_neon_vst1q_v:
8616 case NEON::BI__builtin_neon_vst2_v:
8617 case NEON::BI__builtin_neon_vst2q_v:
8618 case NEON::BI__builtin_neon_vst3_v:
8619 case NEON::BI__builtin_neon_vst3q_v:
8620 case NEON::BI__builtin_neon_vst4_v:
8621 case NEON::BI__builtin_neon_vst4q_v:
8622 case NEON::BI__builtin_neon_vst2_lane_v:
8623 case NEON::BI__builtin_neon_vst2q_lane_v:
8624 case NEON::BI__builtin_neon_vst3_lane_v:
8625 case NEON::BI__builtin_neon_vst3q_lane_v:
8626 case NEON::BI__builtin_neon_vst4_lane_v:
8627 case NEON::BI__builtin_neon_vst4q_lane_v: {
8628 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8629 Ops.push_back(getAlignmentValue32(PtrOp0));
8630 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8631 }
8632 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8633 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8634 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8635 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8636 case NEON::BI__builtin_neon_vsm4eq_u32: {
8637 Function *F = CGM.getIntrinsic(Int);
8638 return EmitNeonCall(F, Ops, "");
8639 }
8640 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8641 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8642 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8643 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8644 Function *F = CGM.getIntrinsic(Int);
8645 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8646 return EmitNeonCall(F, Ops, "");
8647 }
8648 case NEON::BI__builtin_neon_vst1_x2_v:
8649 case NEON::BI__builtin_neon_vst1q_x2_v:
8650 case NEON::BI__builtin_neon_vst1_x3_v:
8651 case NEON::BI__builtin_neon_vst1q_x3_v:
8652 case NEON::BI__builtin_neon_vst1_x4_v:
8653 case NEON::BI__builtin_neon_vst1q_x4_v: {
8654 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8655 // in AArch64 it comes last. We may want to stick to one or another.
8656 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8657 Arch == llvm::Triple::aarch64_32) {
8658 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8659 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8660 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8661 }
8662 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8663 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8664 }
8665 case NEON::BI__builtin_neon_vsubhn_v: {
8666 llvm::FixedVectorType *SrcTy =
8667 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8668
8669 // %sum = add <4 x i32> %lhs, %rhs
8670 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8671 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8672 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8673
8674 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8675 Constant *ShiftAmt =
8676 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8677 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8678
8679 // %res = trunc <4 x i32> %high to <4 x i16>
8680 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8681 }
8682 case NEON::BI__builtin_neon_vtrn_v:
8683 case NEON::BI__builtin_neon_vtrnq_v: {
8684 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8685 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8686 Value *SV = nullptr;
8687
8688 for (unsigned vi = 0; vi != 2; ++vi) {
8689 SmallVector<int, 16> Indices;
8690 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8691 Indices.push_back(i+vi);
8692 Indices.push_back(i+e+vi);
8693 }
8694 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8695 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8696 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8697 }
8698 return SV;
8699 }
8700 case NEON::BI__builtin_neon_vtst_v:
8701 case NEON::BI__builtin_neon_vtstq_v: {
8702 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8703 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8704 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8705 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8706 ConstantAggregateZero::get(Ty));
8707 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8708 }
8709 case NEON::BI__builtin_neon_vuzp_v:
8710 case NEON::BI__builtin_neon_vuzpq_v: {
8711 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8712 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8713 Value *SV = nullptr;
8714
8715 for (unsigned vi = 0; vi != 2; ++vi) {
8716 SmallVector<int, 16> Indices;
8717 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8718 Indices.push_back(2*i+vi);
8719
8720 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8721 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8722 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8723 }
8724 return SV;
8725 }
8726 case NEON::BI__builtin_neon_vxarq_u64: {
8727 Function *F = CGM.getIntrinsic(Int);
8728 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8729 return EmitNeonCall(F, Ops, "");
8730 }
8731 case NEON::BI__builtin_neon_vzip_v:
8732 case NEON::BI__builtin_neon_vzipq_v: {
8733 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8734 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8735 Value *SV = nullptr;
8736
8737 for (unsigned vi = 0; vi != 2; ++vi) {
8738 SmallVector<int, 16> Indices;
8739 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8740 Indices.push_back((i + vi*e) >> 1);
8741 Indices.push_back(((i + vi*e) >> 1)+e);
8742 }
8743 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8744 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8745 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8746 }
8747 return SV;
8748 }
8749 case NEON::BI__builtin_neon_vdot_s32:
8750 case NEON::BI__builtin_neon_vdot_u32:
8751 case NEON::BI__builtin_neon_vdotq_s32:
8752 case NEON::BI__builtin_neon_vdotq_u32: {
8753 auto *InputTy =
8754 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8755 llvm::Type *Tys[2] = { Ty, InputTy };
8756 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8757 }
8758 case NEON::BI__builtin_neon_vfmlal_low_f16:
8759 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8760 auto *InputTy =
8761 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8762 llvm::Type *Tys[2] = { Ty, InputTy };
8763 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8764 }
8765 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8766 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8767 auto *InputTy =
8768 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8769 llvm::Type *Tys[2] = { Ty, InputTy };
8770 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8771 }
8772 case NEON::BI__builtin_neon_vfmlal_high_f16:
8773 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8774 auto *InputTy =
8775 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8776 llvm::Type *Tys[2] = { Ty, InputTy };
8777 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8778 }
8779 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8780 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8781 auto *InputTy =
8782 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8783 llvm::Type *Tys[2] = { Ty, InputTy };
8784 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8785 }
8786 case NEON::BI__builtin_neon_vmmlaq_s32:
8787 case NEON::BI__builtin_neon_vmmlaq_u32: {
8788 auto *InputTy =
8789 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8790 llvm::Type *Tys[2] = { Ty, InputTy };
8791 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8792 }
8793 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8794 auto *InputTy =
8795 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8796 llvm::Type *Tys[2] = { Ty, InputTy };
8797 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8798 }
8799 case NEON::BI__builtin_neon_vusdot_s32:
8800 case NEON::BI__builtin_neon_vusdotq_s32: {
8801 auto *InputTy =
8802 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8803 llvm::Type *Tys[2] = { Ty, InputTy };
8804 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8805 }
8806 case NEON::BI__builtin_neon_vbfdot_f32:
8807 case NEON::BI__builtin_neon_vbfdotq_f32: {
8808 llvm::Type *InputTy =
8809 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8810 llvm::Type *Tys[2] = { Ty, InputTy };
8811 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8812 }
8813 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8814 llvm::Type *Tys[1] = { Ty };
8815 Function *F = CGM.getIntrinsic(Int, Tys);
8816 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8817 }
8818
8819 }
8820
8821 assert(Int && "Expected valid intrinsic number");
8822
8823 // Determine the type(s) of this overloaded AArch64 intrinsic.
8824 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8825
8826 Value *Result = EmitNeonCall(F, Ops, NameHint);
8827 llvm::Type *ResultType = ConvertType(E->getType());
8828 // AArch64 intrinsic one-element vector type cast to
8829 // scalar type expected by the builtin
8830 return Builder.CreateBitCast(Result, ResultType, NameHint);
8831}
8832
8834 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8835 const CmpInst::Predicate Ip, const Twine &Name) {
8836 llvm::Type *OTy = Op->getType();
8837
8838 // FIXME: this is utterly horrific. We should not be looking at previous
8839 // codegen context to find out what needs doing. Unfortunately TableGen
8840 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8841 // (etc).
8842 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8843 OTy = BI->getOperand(0)->getType();
8844
8845 Op = Builder.CreateBitCast(Op, OTy);
8846 if (OTy->getScalarType()->isFloatingPointTy()) {
8847 if (Fp == CmpInst::FCMP_OEQ)
8848 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8849 else
8850 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8851 } else {
8852 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8853 }
8854 return Builder.CreateSExt(Op, Ty, Name);
8855}
8856
8858 Value *ExtOp, Value *IndexOp,
8859 llvm::Type *ResTy, unsigned IntID,
8860 const char *Name) {
8862 if (ExtOp)
8863 TblOps.push_back(ExtOp);
8864
8865 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8866 SmallVector<int, 16> Indices;
8867 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8868 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8869 Indices.push_back(2*i);
8870 Indices.push_back(2*i+1);
8871 }
8872
8873 int PairPos = 0, End = Ops.size() - 1;
8874 while (PairPos < End) {
8875 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8876 Ops[PairPos+1], Indices,
8877 Name));
8878 PairPos += 2;
8879 }
8880
8881 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8882 // of the 128-bit lookup table with zero.
8883 if (PairPos == End) {
8884 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8885 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8886 ZeroTbl, Indices, Name));
8887 }
8888
8889 Function *TblF;
8890 TblOps.push_back(IndexOp);
8891 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8892
8893 return CGF.EmitNeonCall(TblF, TblOps, Name);
8894}
8895
8896Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8897 unsigned Value;
8898 switch (BuiltinID) {
8899 default:
8900 return nullptr;
8901 case clang::ARM::BI__builtin_arm_nop:
8902 Value = 0;
8903 break;
8904 case clang::ARM::BI__builtin_arm_yield:
8905 case clang::ARM::BI__yield:
8906 Value = 1;
8907 break;
8908 case clang::ARM::BI__builtin_arm_wfe:
8909 case clang::ARM::BI__wfe:
8910 Value = 2;
8911 break;
8912 case clang::ARM::BI__builtin_arm_wfi:
8913 case clang::ARM::BI__wfi:
8914 Value = 3;
8915 break;
8916 case clang::ARM::BI__builtin_arm_sev:
8917 case clang::ARM::BI__sev:
8918 Value = 4;
8919 break;
8920 case clang::ARM::BI__builtin_arm_sevl:
8921 case clang::ARM::BI__sevl:
8922 Value = 5;
8923 break;
8924 }
8925
8926 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8927 llvm::ConstantInt::get(Int32Ty, Value));
8928}
8929
8934};
8935
8936// Generates the IR for __builtin_read_exec_*.
8937// Lowers the builtin to amdgcn_ballot intrinsic.
8939 llvm::Type *RegisterType,
8940 llvm::Type *ValueType, bool isExecHi) {
8941 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8942 CodeGen::CodeGenModule &CGM = CGF.CGM;
8943
8944 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8945 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8946
8947 if (isExecHi) {
8948 Value *Rt2 = Builder.CreateLShr(Call, 32);
8949 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8950 return Rt2;
8951 }
8952
8953 return Call;
8954}
8955
8956// Generates the IR for the read/write special register builtin,
8957// ValueType is the type of the value that is to be written or read,
8958// RegisterType is the type of the register being written to or read from.
8960 const CallExpr *E,
8961 llvm::Type *RegisterType,
8962 llvm::Type *ValueType,
8963 SpecialRegisterAccessKind AccessKind,
8964 StringRef SysReg = "") {
8965 // write and register intrinsics only support 32, 64 and 128 bit operations.
8966 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8967 RegisterType->isIntegerTy(128)) &&
8968 "Unsupported size for register.");
8969
8970 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8971 CodeGen::CodeGenModule &CGM = CGF.CGM;
8972 LLVMContext &Context = CGM.getLLVMContext();
8973
8974 if (SysReg.empty()) {
8975 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8976 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8977 }
8978
8979 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8980 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8981 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8982
8983 llvm::Type *Types[] = { RegisterType };
8984
8985 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8986 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8987 && "Can't fit 64-bit value in 32-bit register");
8988
8989 if (AccessKind != Write) {
8990 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8991 llvm::Function *F = CGM.getIntrinsic(
8992 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8993 : llvm::Intrinsic::read_register,
8994 Types);
8995 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8996
8997 if (MixedTypes)
8998 // Read into 64 bit register and then truncate result to 32 bit.
8999 return Builder.CreateTrunc(Call, ValueType);
9000
9001 if (ValueType->isPointerTy())
9002 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
9003 return Builder.CreateIntToPtr(Call, ValueType);
9004
9005 return Call;
9006 }
9007
9008 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
9009 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
9010 if (MixedTypes) {
9011 // Extend 32 bit write value to 64 bit to pass to write.
9012 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
9013 return Builder.CreateCall(F, { Metadata, ArgValue });
9014 }
9015
9016 if (ValueType->isPointerTy()) {
9017 // Have VoidPtrTy ArgValue but want to return an i32/i64.
9018 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
9019 return Builder.CreateCall(F, { Metadata, ArgValue });
9020 }
9021
9022 return Builder.CreateCall(F, { Metadata, ArgValue });
9023}
9024
9025/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
9026/// argument that specifies the vector type.
9027static bool HasExtraNeonArgument(unsigned BuiltinID) {
9028 switch (BuiltinID) {
9029 default: break;
9030 case NEON::BI__builtin_neon_vget_lane_i8:
9031 case NEON::BI__builtin_neon_vget_lane_i16:
9032 case NEON::BI__builtin_neon_vget_lane_bf16:
9033 case NEON::BI__builtin_neon_vget_lane_i32:
9034 case NEON::BI__builtin_neon_vget_lane_i64:
9035 case NEON::BI__builtin_neon_vget_lane_f32:
9036 case NEON::BI__builtin_neon_vgetq_lane_i8:
9037 case NEON::BI__builtin_neon_vgetq_lane_i16:
9038 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9039 case NEON::BI__builtin_neon_vgetq_lane_i32:
9040 case NEON::BI__builtin_neon_vgetq_lane_i64:
9041 case NEON::BI__builtin_neon_vgetq_lane_f32:
9042 case NEON::BI__builtin_neon_vduph_lane_bf16:
9043 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9044 case NEON::BI__builtin_neon_vset_lane_i8:
9045 case NEON::BI__builtin_neon_vset_lane_i16:
9046 case NEON::BI__builtin_neon_vset_lane_bf16:
9047 case NEON::BI__builtin_neon_vset_lane_i32:
9048 case NEON::BI__builtin_neon_vset_lane_i64:
9049 case NEON::BI__builtin_neon_vset_lane_f32:
9050 case NEON::BI__builtin_neon_vsetq_lane_i8:
9051 case NEON::BI__builtin_neon_vsetq_lane_i16:
9052 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9053 case NEON::BI__builtin_neon_vsetq_lane_i32:
9054 case NEON::BI__builtin_neon_vsetq_lane_i64:
9055 case NEON::BI__builtin_neon_vsetq_lane_f32:
9056 case NEON::BI__builtin_neon_vsha1h_u32:
9057 case NEON::BI__builtin_neon_vsha1cq_u32:
9058 case NEON::BI__builtin_neon_vsha1pq_u32:
9059 case NEON::BI__builtin_neon_vsha1mq_u32:
9060 case NEON::BI__builtin_neon_vcvth_bf16_f32:
9061 case clang::ARM::BI_MoveToCoprocessor:
9062 case clang::ARM::BI_MoveToCoprocessor2:
9063 return false;
9064 }
9065 return true;
9066}
9067
9068Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
9069 const CallExpr *E,
9070 ReturnValueSlot ReturnValue,
9071 llvm::Triple::ArchType Arch) {
9072 if (auto Hint = GetValueForARMHint(BuiltinID))
9073 return Hint;
9074
9075 if (BuiltinID == clang::ARM::BI__emit) {
9076 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
9077 llvm::FunctionType *FTy =
9078 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
9079
9081 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
9082 llvm_unreachable("Sema will ensure that the parameter is constant");
9083
9084 llvm::APSInt Value = Result.Val.getInt();
9085 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
9086
9087 llvm::InlineAsm *Emit =
9088 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
9089 /*hasSideEffects=*/true)
9090 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
9091 /*hasSideEffects=*/true);
9092
9093 return Builder.CreateCall(Emit);
9094 }
9095
9096 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
9097 Value *Option = EmitScalarExpr(E->getArg(0));
9098 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
9099 }
9100
9101 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
9102 Value *Address = EmitScalarExpr(E->getArg(0));
9103 Value *RW = EmitScalarExpr(E->getArg(1));
9104 Value *IsData = EmitScalarExpr(E->getArg(2));
9105
9106 // Locality is not supported on ARM target
9107 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
9108
9109 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
9110 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
9111 }
9112
9113 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
9114 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9115 return Builder.CreateCall(
9116 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
9117 }
9118
9119 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
9120 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
9121 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9122 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
9123 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
9124 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
9125 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
9126 return Res;
9127 }
9128
9129
9130 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
9131 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9132 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
9133 }
9134 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
9135 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9136 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
9137 "cls");
9138 }
9139
9140 if (BuiltinID == clang::ARM::BI__clear_cache) {
9141 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
9142 const FunctionDecl *FD = E->getDirectCallee();
9143 Value *Ops[2];
9144 for (unsigned i = 0; i < 2; i++)
9145 Ops[i] = EmitScalarExpr(E->getArg(i));
9146 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
9147 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
9148 StringRef Name = FD->getName();
9149 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
9150 }
9151
9152 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
9153 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
9154 Function *F;
9155
9156 switch (BuiltinID) {
9157 default: llvm_unreachable("unexpected builtin");
9158 case clang::ARM::BI__builtin_arm_mcrr:
9159 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
9160 break;
9161 case clang::ARM::BI__builtin_arm_mcrr2:
9162 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
9163 break;
9164 }
9165
9166 // MCRR{2} instruction has 5 operands but
9167 // the intrinsic has 4 because Rt and Rt2
9168 // are represented as a single unsigned 64
9169 // bit integer in the intrinsic definition
9170 // but internally it's represented as 2 32
9171 // bit integers.
9172
9173 Value *Coproc = EmitScalarExpr(E->getArg(0));
9174 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9175 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
9176 Value *CRm = EmitScalarExpr(E->getArg(3));
9177
9178 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9179 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
9180 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
9181 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
9182
9183 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
9184 }
9185
9186 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
9187 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
9188 Function *F;
9189
9190 switch (BuiltinID) {
9191 default: llvm_unreachable("unexpected builtin");
9192 case clang::ARM::BI__builtin_arm_mrrc:
9193 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
9194 break;
9195 case clang::ARM::BI__builtin_arm_mrrc2:
9196 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
9197 break;
9198 }
9199
9200 Value *Coproc = EmitScalarExpr(E->getArg(0));
9201 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9202 Value *CRm = EmitScalarExpr(E->getArg(2));
9203 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
9204
9205 // Returns an unsigned 64 bit integer, represented
9206 // as two 32 bit integers.
9207
9208 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
9209 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
9210 Rt = Builder.CreateZExt(Rt, Int64Ty);
9211 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
9212
9213 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
9214 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
9215 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
9216
9217 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
9218 }
9219
9220 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
9221 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9222 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
9223 getContext().getTypeSize(E->getType()) == 64) ||
9224 BuiltinID == clang::ARM::BI__ldrexd) {
9225 Function *F;
9226
9227 switch (BuiltinID) {
9228 default: llvm_unreachable("unexpected builtin");
9229 case clang::ARM::BI__builtin_arm_ldaex:
9230 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
9231 break;
9232 case clang::ARM::BI__builtin_arm_ldrexd:
9233 case clang::ARM::BI__builtin_arm_ldrex:
9234 case clang::ARM::BI__ldrexd:
9235 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
9236 break;
9237 }
9238
9239 Value *LdPtr = EmitScalarExpr(E->getArg(0));
9240 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
9241
9242 Value *Val0 = Builder.CreateExtractValue(Val, 1);
9243 Value *Val1 = Builder.CreateExtractValue(Val, 0);
9244 Val0 = Builder.CreateZExt(Val0, Int64Ty);
9245 Val1 = Builder.CreateZExt(Val1, Int64Ty);
9246
9247 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
9248 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
9249 Val = Builder.CreateOr(Val, Val1);
9250 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
9251 }
9252
9253 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9254 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
9255 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
9256
9257 QualType Ty = E->getType();
9258 llvm::Type *RealResTy = ConvertType(Ty);
9259 llvm::Type *IntTy =
9260 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9261
9263 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
9264 : Intrinsic::arm_ldrex,
9265 UnqualPtrTy);
9266 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
9267 Val->addParamAttr(
9268 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
9269
9270 if (RealResTy->isPointerTy())
9271 return Builder.CreateIntToPtr(Val, RealResTy);
9272 else {
9273 llvm::Type *IntResTy = llvm::IntegerType::get(
9274 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
9275 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
9276 RealResTy);
9277 }
9278 }
9279
9280 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
9281 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
9282 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
9283 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
9285 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
9286 : Intrinsic::arm_strexd);
9287 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
9288
9289 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9290 Value *Val = EmitScalarExpr(E->getArg(0));
9291 Builder.CreateStore(Val, Tmp);
9292
9293 Address LdPtr = Tmp.withElementType(STy);
9294 Val = Builder.CreateLoad(LdPtr);
9295
9296 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
9297 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
9298 Value *StPtr = EmitScalarExpr(E->getArg(1));
9299 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
9300 }
9301
9302 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
9303 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
9304 Value *StoreVal = EmitScalarExpr(E->getArg(0));
9305 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
9306
9307 QualType Ty = E->getArg(0)->getType();
9308 llvm::Type *StoreTy =
9309 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9310
9311 if (StoreVal->getType()->isPointerTy())
9312 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
9313 else {
9314 llvm::Type *IntTy = llvm::IntegerType::get(
9316 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
9317 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
9318 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
9319 }
9320
9322 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
9323 : Intrinsic::arm_strex,
9324 StoreAddr->getType());
9325
9326 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
9327 CI->addParamAttr(
9328 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
9329 return CI;
9330 }
9331
9332 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
9333 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
9334 return Builder.CreateCall(F);
9335 }
9336
9337 // CRC32
9338 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
9339 switch (BuiltinID) {
9340 case clang::ARM::BI__builtin_arm_crc32b:
9341 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
9342 case clang::ARM::BI__builtin_arm_crc32cb:
9343 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
9344 case clang::ARM::BI__builtin_arm_crc32h:
9345 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
9346 case clang::ARM::BI__builtin_arm_crc32ch:
9347 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
9348 case clang::ARM::BI__builtin_arm_crc32w:
9349 case clang::ARM::BI__builtin_arm_crc32d:
9350 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
9351 case clang::ARM::BI__builtin_arm_crc32cw:
9352 case clang::ARM::BI__builtin_arm_crc32cd:
9353 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
9354 }
9355
9356 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
9357 Value *Arg0 = EmitScalarExpr(E->getArg(0));
9358 Value *Arg1 = EmitScalarExpr(E->getArg(1));
9359
9360 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
9361 // intrinsics, hence we need different codegen for these cases.
9362 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
9363 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
9364 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9365 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
9366 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
9367 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
9368
9369 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9370 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
9371 return Builder.CreateCall(F, {Res, Arg1b});
9372 } else {
9373 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
9374
9375 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9376 return Builder.CreateCall(F, {Arg0, Arg1});
9377 }
9378 }
9379
9380 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9381 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9382 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9383 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
9384 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
9385 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
9386
9387 SpecialRegisterAccessKind AccessKind = Write;
9388 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9389 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9390 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
9391 AccessKind = VolatileRead;
9392
9393 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9394 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
9395
9396 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9397 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
9398
9399 llvm::Type *ValueType;
9400 llvm::Type *RegisterType;
9401 if (IsPointerBuiltin) {
9402 ValueType = VoidPtrTy;
9404 } else if (Is64Bit) {
9405 ValueType = RegisterType = Int64Ty;
9406 } else {
9407 ValueType = RegisterType = Int32Ty;
9408 }
9409
9410 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
9411 AccessKind);
9412 }
9413
9414 if (BuiltinID == ARM::BI__builtin_sponentry) {
9415 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
9416 return Builder.CreateCall(F);
9417 }
9418
9419 // Handle MSVC intrinsics before argument evaluation to prevent double
9420 // evaluation.
9421 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
9422 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
9423
9424 // Deal with MVE builtins
9425 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9426 return Result;
9427 // Handle CDE builtins
9428 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9429 return Result;
9430
9431 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
9432 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
9433 return P.first == BuiltinID;
9434 });
9435 if (It != end(NEONEquivalentIntrinsicMap))
9436 BuiltinID = It->second;
9437
9438 // Find out if any arguments are required to be integer constant
9439 // expressions.
9440 unsigned ICEArguments = 0;
9442 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9443 assert(Error == ASTContext::GE_None && "Should not codegen an error");
9444
9445 auto getAlignmentValue32 = [&](Address addr) -> Value* {
9446 return Builder.getInt32(addr.getAlignment().getQuantity());
9447 };
9448
9449 Address PtrOp0 = Address::invalid();
9450 Address PtrOp1 = Address::invalid();
9452 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
9453 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
9454 for (unsigned i = 0, e = NumArgs; i != e; i++) {
9455 if (i == 0) {
9456 switch (BuiltinID) {
9457 case NEON::BI__builtin_neon_vld1_v:
9458 case NEON::BI__builtin_neon_vld1q_v:
9459 case NEON::BI__builtin_neon_vld1q_lane_v:
9460 case NEON::BI__builtin_neon_vld1_lane_v:
9461 case NEON::BI__builtin_neon_vld1_dup_v:
9462 case NEON::BI__builtin_neon_vld1q_dup_v:
9463 case NEON::BI__builtin_neon_vst1_v:
9464 case NEON::BI__builtin_neon_vst1q_v:
9465 case NEON::BI__builtin_neon_vst1q_lane_v:
9466 case NEON::BI__builtin_neon_vst1_lane_v:
9467 case NEON::BI__builtin_neon_vst2_v:
9468 case NEON::BI__builtin_neon_vst2q_v:
9469 case NEON::BI__builtin_neon_vst2_lane_v:
9470 case NEON::BI__builtin_neon_vst2q_lane_v:
9471 case NEON::BI__builtin_neon_vst3_v:
9472 case NEON::BI__builtin_neon_vst3q_v:
9473 case NEON::BI__builtin_neon_vst3_lane_v:
9474 case NEON::BI__builtin_neon_vst3q_lane_v:
9475 case NEON::BI__builtin_neon_vst4_v:
9476 case NEON::BI__builtin_neon_vst4q_v:
9477 case NEON::BI__builtin_neon_vst4_lane_v:
9478 case NEON::BI__builtin_neon_vst4q_lane_v:
9479 // Get the alignment for the argument in addition to the value;
9480 // we'll use it later.
9481 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
9482 Ops.push_back(PtrOp0.emitRawPointer(*this));
9483 continue;
9484 }
9485 }
9486 if (i == 1) {
9487 switch (BuiltinID) {
9488 case NEON::BI__builtin_neon_vld2_v:
9489 case NEON::BI__builtin_neon_vld2q_v:
9490 case NEON::BI__builtin_neon_vld3_v:
9491 case NEON::BI__builtin_neon_vld3q_v:
9492 case NEON::BI__builtin_neon_vld4_v:
9493 case NEON::BI__builtin_neon_vld4q_v:
9494 case NEON::BI__builtin_neon_vld2_lane_v:
9495 case NEON::BI__builtin_neon_vld2q_lane_v:
9496 case NEON::BI__builtin_neon_vld3_lane_v:
9497 case NEON::BI__builtin_neon_vld3q_lane_v:
9498 case NEON::BI__builtin_neon_vld4_lane_v:
9499 case NEON::BI__builtin_neon_vld4q_lane_v:
9500 case NEON::BI__builtin_neon_vld2_dup_v:
9501 case NEON::BI__builtin_neon_vld2q_dup_v:
9502 case NEON::BI__builtin_neon_vld3_dup_v:
9503 case NEON::BI__builtin_neon_vld3q_dup_v:
9504 case NEON::BI__builtin_neon_vld4_dup_v:
9505 case NEON::BI__builtin_neon_vld4q_dup_v:
9506 // Get the alignment for the argument in addition to the value;
9507 // we'll use it later.
9508 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9509 Ops.push_back(PtrOp1.emitRawPointer(*this));
9510 continue;
9511 }
9512 }
9513
9514 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9515 }
9516
9517 switch (BuiltinID) {
9518 default: break;
9519
9520 case NEON::BI__builtin_neon_vget_lane_i8:
9521 case NEON::BI__builtin_neon_vget_lane_i16:
9522 case NEON::BI__builtin_neon_vget_lane_i32:
9523 case NEON::BI__builtin_neon_vget_lane_i64:
9524 case NEON::BI__builtin_neon_vget_lane_bf16:
9525 case NEON::BI__builtin_neon_vget_lane_f32:
9526 case NEON::BI__builtin_neon_vgetq_lane_i8:
9527 case NEON::BI__builtin_neon_vgetq_lane_i16:
9528 case NEON::BI__builtin_neon_vgetq_lane_i32:
9529 case NEON::BI__builtin_neon_vgetq_lane_i64:
9530 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9531 case NEON::BI__builtin_neon_vgetq_lane_f32:
9532 case NEON::BI__builtin_neon_vduph_lane_bf16:
9533 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9534 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9535
9536 case NEON::BI__builtin_neon_vrndns_f32: {
9537 Value *Arg = EmitScalarExpr(E->getArg(0));
9538 llvm::Type *Tys[] = {Arg->getType()};
9539 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9540 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9541
9542 case NEON::BI__builtin_neon_vset_lane_i8:
9543 case NEON::BI__builtin_neon_vset_lane_i16:
9544 case NEON::BI__builtin_neon_vset_lane_i32:
9545 case NEON::BI__builtin_neon_vset_lane_i64:
9546 case NEON::BI__builtin_neon_vset_lane_bf16:
9547 case NEON::BI__builtin_neon_vset_lane_f32:
9548 case NEON::BI__builtin_neon_vsetq_lane_i8:
9549 case NEON::BI__builtin_neon_vsetq_lane_i16:
9550 case NEON::BI__builtin_neon_vsetq_lane_i32:
9551 case NEON::BI__builtin_neon_vsetq_lane_i64:
9552 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9553 case NEON::BI__builtin_neon_vsetq_lane_f32:
9554 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9555
9556 case NEON::BI__builtin_neon_vsha1h_u32:
9557 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9558 "vsha1h");
9559 case NEON::BI__builtin_neon_vsha1cq_u32:
9560 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9561 "vsha1h");
9562 case NEON::BI__builtin_neon_vsha1pq_u32:
9563 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9564 "vsha1h");
9565 case NEON::BI__builtin_neon_vsha1mq_u32:
9566 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9567 "vsha1h");
9568
9569 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9570 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9571 "vcvtbfp2bf");
9572 }
9573
9574 // The ARM _MoveToCoprocessor builtins put the input register value as
9575 // the first argument, but the LLVM intrinsic expects it as the third one.
9576 case clang::ARM::BI_MoveToCoprocessor:
9577 case clang::ARM::BI_MoveToCoprocessor2: {
9578 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9579 ? Intrinsic::arm_mcr
9580 : Intrinsic::arm_mcr2);
9581 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9582 Ops[3], Ops[4], Ops[5]});
9583 }
9584 }
9585
9586 // Get the last argument, which specifies the vector type.
9587 assert(HasExtraArg);
9588 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9589 std::optional<llvm::APSInt> Result =
9591 if (!Result)
9592 return nullptr;
9593
9594 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9595 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9596 // Determine the overloaded type of this builtin.
9597 llvm::Type *Ty;
9598 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9599 Ty = FloatTy;
9600 else
9601 Ty = DoubleTy;
9602
9603 // Determine whether this is an unsigned conversion or not.
9604 bool usgn = Result->getZExtValue() == 1;
9605 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9606
9607 // Call the appropriate intrinsic.
9608 Function *F = CGM.getIntrinsic(Int, Ty);
9609 return Builder.CreateCall(F, Ops, "vcvtr");
9610 }
9611
9612 // Determine the type of this overloaded NEON intrinsic.
9613 NeonTypeFlags Type = Result->getZExtValue();
9614 bool usgn = Type.isUnsigned();
9615 bool rightShift = false;
9616
9617 llvm::FixedVectorType *VTy =
9618 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9619 getTarget().hasBFloat16Type());
9620 llvm::Type *Ty = VTy;
9621 if (!Ty)
9622 return nullptr;
9623
9624 // Many NEON builtins have identical semantics and uses in ARM and
9625 // AArch64. Emit these in a single function.
9626 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9627 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9628 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9629 if (Builtin)
9631 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9632 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9633
9634 unsigned Int;
9635 switch (BuiltinID) {
9636 default: return nullptr;
9637 case NEON::BI__builtin_neon_vld1q_lane_v:
9638 // Handle 64-bit integer elements as a special case. Use shuffles of
9639 // one-element vectors to avoid poor code for i64 in the backend.
9640 if (VTy->getElementType()->isIntegerTy(64)) {
9641 // Extract the other lane.
9642 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9643 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9644 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9645 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9646 // Load the value as a one-element vector.
9647 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9648 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9649 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9650 Value *Align = getAlignmentValue32(PtrOp0);
9651 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9652 // Combine them.
9653 int Indices[] = {1 - Lane, Lane};
9654 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9655 }
9656 [[fallthrough]];
9657 case NEON::BI__builtin_neon_vld1_lane_v: {
9658 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9659 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9660 Value *Ld = Builder.CreateLoad(PtrOp0);
9661 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9662 }
9663 case NEON::BI__builtin_neon_vqrshrn_n_v:
9664 Int =
9665 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9666 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9667 1, true);
9668 case NEON::BI__builtin_neon_vqrshrun_n_v:
9669 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9670 Ops, "vqrshrun_n", 1, true);
9671 case NEON::BI__builtin_neon_vqshrn_n_v:
9672 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9673 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9674 1, true);
9675 case NEON::BI__builtin_neon_vqshrun_n_v:
9676 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9677 Ops, "vqshrun_n", 1, true);
9678 case NEON::BI__builtin_neon_vrecpe_v:
9679 case NEON::BI__builtin_neon_vrecpeq_v:
9680 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9681 Ops, "vrecpe");
9682 case NEON::BI__builtin_neon_vrshrn_n_v:
9683 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9684 Ops, "vrshrn_n", 1, true);
9685 case NEON::BI__builtin_neon_vrsra_n_v:
9686 case NEON::BI__builtin_neon_vrsraq_n_v:
9687 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9688 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9689 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9690 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9691 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9692 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9693 case NEON::BI__builtin_neon_vsri_n_v:
9694 case NEON::BI__builtin_neon_vsriq_n_v:
9695 rightShift = true;
9696 [[fallthrough]];
9697 case NEON::BI__builtin_neon_vsli_n_v:
9698 case NEON::BI__builtin_neon_vsliq_n_v:
9699 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9700 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9701 Ops, "vsli_n");
9702 case NEON::BI__builtin_neon_vsra_n_v:
9703 case NEON::BI__builtin_neon_vsraq_n_v:
9704 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9705 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9706 return Builder.CreateAdd(Ops[0], Ops[1]);
9707 case NEON::BI__builtin_neon_vst1q_lane_v:
9708 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9709 // a one-element vector and avoid poor code for i64 in the backend.
9710 if (VTy->getElementType()->isIntegerTy(64)) {
9711 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9712 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9713 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9714 Ops[2] = getAlignmentValue32(PtrOp0);
9715 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9716 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9717 Tys), Ops);
9718 }
9719 [[fallthrough]];
9720 case NEON::BI__builtin_neon_vst1_lane_v: {
9721 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9722 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9723 return Builder.CreateStore(Ops[1],
9724 PtrOp0.withElementType(Ops[1]->getType()));
9725 }
9726 case NEON::BI__builtin_neon_vtbl1_v:
9727 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9728 Ops, "vtbl1");
9729 case NEON::BI__builtin_neon_vtbl2_v:
9730 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9731 Ops, "vtbl2");
9732 case NEON::BI__builtin_neon_vtbl3_v:
9733 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9734 Ops, "vtbl3");
9735 case NEON::BI__builtin_neon_vtbl4_v:
9736 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9737 Ops, "vtbl4");
9738 case NEON::BI__builtin_neon_vtbx1_v:
9739 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9740 Ops, "vtbx1");
9741 case NEON::BI__builtin_neon_vtbx2_v:
9742 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9743 Ops, "vtbx2");
9744 case NEON::BI__builtin_neon_vtbx3_v:
9745 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9746 Ops, "vtbx3");
9747 case NEON::BI__builtin_neon_vtbx4_v:
9748 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9749 Ops, "vtbx4");
9750 }
9751}
9752
9753template<typename Integer>
9755 return E->getIntegerConstantExpr(Context)->getExtValue();
9756}
9757
9758static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9759 llvm::Type *T, bool Unsigned) {
9760 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9761 // which finds it convenient to specify signed/unsigned as a boolean flag.
9762 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9763}
9764
9765static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9766 uint32_t Shift, bool Unsigned) {
9767 // MVE helper function for integer shift right. This must handle signed vs
9768 // unsigned, and also deal specially with the case where the shift count is
9769 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9770 // undefined behavior, but in MVE it's legal, so we must convert it to code
9771 // that is not undefined in IR.
9772 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9773 ->getElementType()
9774 ->getPrimitiveSizeInBits();
9775 if (Shift == LaneBits) {
9776 // An unsigned shift of the full lane size always generates zero, so we can
9777 // simply emit a zero vector. A signed shift of the full lane size does the
9778 // same thing as shifting by one bit fewer.
9779 if (Unsigned)
9780 return llvm::Constant::getNullValue(V->getType());
9781 else
9782 --Shift;
9783 }
9784 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9785}
9786
9787static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9788 // MVE-specific helper function for a vector splat, which infers the element
9789 // count of the output vector by knowing that MVE vectors are all 128 bits
9790 // wide.
9791 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9792 return Builder.CreateVectorSplat(Elements, V);
9793}
9794
9795static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9796 CodeGenFunction *CGF,
9797 llvm::Value *V,
9798 llvm::Type *DestType) {
9799 // Convert one MVE vector type into another by reinterpreting its in-register
9800 // format.
9801 //
9802 // Little-endian, this is identical to a bitcast (which reinterprets the
9803 // memory format). But big-endian, they're not necessarily the same, because
9804 // the register and memory formats map to each other differently depending on
9805 // the lane size.
9806 //
9807 // We generate a bitcast whenever we can (if we're little-endian, or if the
9808 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9809 // that performs the different kind of reinterpretation.
9810 if (CGF->getTarget().isBigEndian() &&
9811 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9812 return Builder.CreateCall(
9813 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9814 {DestType, V->getType()}),
9815 V);
9816 } else {
9817 return Builder.CreateBitCast(V, DestType);
9818 }
9819}
9820
9821static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9822 // Make a shufflevector that extracts every other element of a vector (evens
9823 // or odds, as desired).
9824 SmallVector<int, 16> Indices;
9825 unsigned InputElements =
9826 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9827 for (unsigned i = 0; i < InputElements; i += 2)
9828 Indices.push_back(i + Odd);
9829 return Builder.CreateShuffleVector(V, Indices);
9830}
9831
9832static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9833 llvm::Value *V1) {
9834 // Make a shufflevector that interleaves two vectors element by element.
9835 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9836 SmallVector<int, 16> Indices;
9837 unsigned InputElements =
9838 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9839 for (unsigned i = 0; i < InputElements; i++) {
9840 Indices.push_back(i);
9841 Indices.push_back(i + InputElements);
9842 }
9843 return Builder.CreateShuffleVector(V0, V1, Indices);
9844}
9845
9846template<unsigned HighBit, unsigned OtherBits>
9847static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9848 // MVE-specific helper function to make a vector splat of a constant such as
9849 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9850 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9851 unsigned LaneBits = T->getPrimitiveSizeInBits();
9852 uint32_t Value = HighBit << (LaneBits - 1);
9853 if (OtherBits)
9854 Value |= (1UL << (LaneBits - 1)) - 1;
9855 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9856 return ARMMVEVectorSplat(Builder, Lane);
9857}
9858
9859static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9860 llvm::Value *V,
9861 unsigned ReverseWidth) {
9862 // MVE-specific helper function which reverses the elements of a
9863 // vector within every (ReverseWidth)-bit collection of lanes.
9864 SmallVector<int, 16> Indices;
9865 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9866 unsigned Elements = 128 / LaneSize;
9867 unsigned Mask = ReverseWidth / LaneSize - 1;
9868 for (unsigned i = 0; i < Elements; i++)
9869 Indices.push_back(i ^ Mask);
9870 return Builder.CreateShuffleVector(V, Indices);
9871}
9872
9874 const CallExpr *E,
9875 ReturnValueSlot ReturnValue,
9876 llvm::Triple::ArchType Arch) {
9877 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9878 Intrinsic::ID IRIntr;
9879 unsigned NumVectors;
9880
9881 // Code autogenerated by Tablegen will handle all the simple builtins.
9882 switch (BuiltinID) {
9883 #include "clang/Basic/arm_mve_builtin_cg.inc"
9884
9885 // If we didn't match an MVE builtin id at all, go back to the
9886 // main EmitARMBuiltinExpr.
9887 default:
9888 return nullptr;
9889 }
9890
9891 // Anything that breaks from that switch is an MVE builtin that
9892 // needs handwritten code to generate.
9893
9894 switch (CustomCodeGenType) {
9895
9896 case CustomCodeGen::VLD24: {
9899
9900 auto MvecCType = E->getType();
9901 auto MvecLType = ConvertType(MvecCType);
9902 assert(MvecLType->isStructTy() &&
9903 "Return type for vld[24]q should be a struct");
9904 assert(MvecLType->getStructNumElements() == 1 &&
9905 "Return-type struct for vld[24]q should have one element");
9906 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9907 assert(MvecLTypeInner->isArrayTy() &&
9908 "Return-type struct for vld[24]q should contain an array");
9909 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9910 "Array member of return-type struct vld[24]q has wrong length");
9911 auto VecLType = MvecLTypeInner->getArrayElementType();
9912
9913 Tys.push_back(VecLType);
9914
9915 auto Addr = E->getArg(0);
9916 Ops.push_back(EmitScalarExpr(Addr));
9917 Tys.push_back(ConvertType(Addr->getType()));
9918
9919 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9920 Value *LoadResult = Builder.CreateCall(F, Ops);
9921 Value *MvecOut = PoisonValue::get(MvecLType);
9922 for (unsigned i = 0; i < NumVectors; ++i) {
9923 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9924 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9925 }
9926
9927 if (ReturnValue.isNull())
9928 return MvecOut;
9929 else
9930 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9931 }
9932
9933 case CustomCodeGen::VST24: {
9936
9937 auto Addr = E->getArg(0);
9938 Ops.push_back(EmitScalarExpr(Addr));
9939 Tys.push_back(ConvertType(Addr->getType()));
9940
9941 auto MvecCType = E->getArg(1)->getType();
9942 auto MvecLType = ConvertType(MvecCType);
9943 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9944 assert(MvecLType->getStructNumElements() == 1 &&
9945 "Data-type struct for vst2q should have one element");
9946 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9947 assert(MvecLTypeInner->isArrayTy() &&
9948 "Data-type struct for vst2q should contain an array");
9949 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9950 "Array member of return-type struct vld[24]q has wrong length");
9951 auto VecLType = MvecLTypeInner->getArrayElementType();
9952
9953 Tys.push_back(VecLType);
9954
9955 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9956 EmitAggExpr(E->getArg(1), MvecSlot);
9957 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9958 for (unsigned i = 0; i < NumVectors; i++)
9959 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9960
9961 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9962 Value *ToReturn = nullptr;
9963 for (unsigned i = 0; i < NumVectors; i++) {
9964 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9965 ToReturn = Builder.CreateCall(F, Ops);
9966 Ops.pop_back();
9967 }
9968 return ToReturn;
9969 }
9970 }
9971 llvm_unreachable("unknown custom codegen type.");
9972}
9973
9975 const CallExpr *E,
9976 ReturnValueSlot ReturnValue,
9977 llvm::Triple::ArchType Arch) {
9978 switch (BuiltinID) {
9979 default:
9980 return nullptr;
9981#include "clang/Basic/arm_cde_builtin_cg.inc"
9982 }
9983}
9984
9985static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9986 const CallExpr *E,
9988 llvm::Triple::ArchType Arch) {
9989 unsigned int Int = 0;
9990 const char *s = nullptr;
9991
9992 switch (BuiltinID) {
9993 default:
9994 return nullptr;
9995 case NEON::BI__builtin_neon_vtbl1_v:
9996 case NEON::BI__builtin_neon_vqtbl1_v:
9997 case NEON::BI__builtin_neon_vqtbl1q_v:
9998 case NEON::BI__builtin_neon_vtbl2_v:
9999 case NEON::BI__builtin_neon_vqtbl2_v:
10000 case NEON::BI__builtin_neon_vqtbl2q_v:
10001 case NEON::BI__builtin_neon_vtbl3_v:
10002 case NEON::BI__builtin_neon_vqtbl3_v:
10003 case NEON::BI__builtin_neon_vqtbl3q_v:
10004 case NEON::BI__builtin_neon_vtbl4_v:
10005 case NEON::BI__builtin_neon_vqtbl4_v:
10006 case NEON::BI__builtin_neon_vqtbl4q_v:
10007 break;
10008 case NEON::BI__builtin_neon_vtbx1_v:
10009 case NEON::BI__builtin_neon_vqtbx1_v:
10010 case NEON::BI__builtin_neon_vqtbx1q_v:
10011 case NEON::BI__builtin_neon_vtbx2_v:
10012 case NEON::BI__builtin_neon_vqtbx2_v:
10013 case NEON::BI__builtin_neon_vqtbx2q_v:
10014 case NEON::BI__builtin_neon_vtbx3_v:
10015 case NEON::BI__builtin_neon_vqtbx3_v:
10016 case NEON::BI__builtin_neon_vqtbx3q_v:
10017 case NEON::BI__builtin_neon_vtbx4_v:
10018 case NEON::BI__builtin_neon_vqtbx4_v:
10019 case NEON::BI__builtin_neon_vqtbx4q_v:
10020 break;
10021 }
10022
10023 assert(E->getNumArgs() >= 3);
10024
10025 // Get the last argument, which specifies the vector type.
10026 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
10027 std::optional<llvm::APSInt> Result =
10029 if (!Result)
10030 return nullptr;
10031
10032 // Determine the type of this overloaded NEON intrinsic.
10033 NeonTypeFlags Type = Result->getZExtValue();
10034 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
10035 if (!Ty)
10036 return nullptr;
10037
10038 CodeGen::CGBuilderTy &Builder = CGF.Builder;
10039
10040 // AArch64 scalar builtins are not overloaded, they do not have an extra
10041 // argument that specifies the vector type, need to handle each case.
10042 switch (BuiltinID) {
10043 case NEON::BI__builtin_neon_vtbl1_v: {
10044 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
10045 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
10046 }
10047 case NEON::BI__builtin_neon_vtbl2_v: {
10048 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
10049 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
10050 }
10051 case NEON::BI__builtin_neon_vtbl3_v: {
10052 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
10053 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
10054 }
10055 case NEON::BI__builtin_neon_vtbl4_v: {
10056 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
10057 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
10058 }
10059 case NEON::BI__builtin_neon_vtbx1_v: {
10060 Value *TblRes =
10061 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
10062 Intrinsic::aarch64_neon_tbl1, "vtbl1");
10063
10064 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
10065 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
10066 CmpRes = Builder.CreateSExt(CmpRes, Ty);
10067
10068 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
10069 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
10070 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
10071 }
10072 case NEON::BI__builtin_neon_vtbx2_v: {
10073 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
10074 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
10075 }
10076 case NEON::BI__builtin_neon_vtbx3_v: {
10077 Value *TblRes =
10078 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
10079 Intrinsic::aarch64_neon_tbl2, "vtbl2");
10080
10081 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
10082 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
10083 TwentyFourV);
10084 CmpRes = Builder.CreateSExt(CmpRes, Ty);
10085
10086 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
10087 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
10088 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
10089 }
10090 case NEON::BI__builtin_neon_vtbx4_v: {
10091 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
10092 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
10093 }
10094 case NEON::BI__builtin_neon_vqtbl1_v:
10095 case NEON::BI__builtin_neon_vqtbl1q_v:
10096 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
10097 case NEON::BI__builtin_neon_vqtbl2_v:
10098 case NEON::BI__builtin_neon_vqtbl2q_v: {
10099 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
10100 case NEON::BI__builtin_neon_vqtbl3_v:
10101 case NEON::BI__builtin_neon_vqtbl3q_v:
10102 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
10103 case NEON::BI__builtin_neon_vqtbl4_v:
10104 case NEON::BI__builtin_neon_vqtbl4q_v:
10105 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
10106 case NEON::BI__builtin_neon_vqtbx1_v:
10107 case NEON::BI__builtin_neon_vqtbx1q_v:
10108 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
10109 case NEON::BI__builtin_neon_vqtbx2_v:
10110 case NEON::BI__builtin_neon_vqtbx2q_v:
10111 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
10112 case NEON::BI__builtin_neon_vqtbx3_v:
10113 case NEON::BI__builtin_neon_vqtbx3q_v:
10114 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
10115 case NEON::BI__builtin_neon_vqtbx4_v:
10116 case NEON::BI__builtin_neon_vqtbx4q_v:
10117 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
10118 }
10119 }
10120
10121 if (!Int)
10122 return nullptr;
10123
10124 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
10125 return CGF.EmitNeonCall(F, Ops, s);
10126}
10127
10129 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
10130 Op = Builder.CreateBitCast(Op, Int16Ty);
10131 Value *V = PoisonValue::get(VTy);
10132 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
10133 Op = Builder.CreateInsertElement(V, Op, CI);
10134 return Op;
10135}
10136
10137/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
10138/// access builtin. Only required if it can't be inferred from the base pointer
10139/// operand.
10140llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
10141 switch (TypeFlags.getMemEltType()) {
10142 case SVETypeFlags::MemEltTyDefault:
10143 return getEltType(TypeFlags);
10144 case SVETypeFlags::MemEltTyInt8:
10145 return Builder.getInt8Ty();
10146 case SVETypeFlags::MemEltTyInt16:
10147 return Builder.getInt16Ty();
10148 case SVETypeFlags::MemEltTyInt32:
10149 return Builder.getInt32Ty();
10150 case SVETypeFlags::MemEltTyInt64:
10151 return Builder.getInt64Ty();
10152 }
10153 llvm_unreachable("Unknown MemEltType");
10154}
10155
10156llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
10157 switch (TypeFlags.getEltType()) {
10158 default:
10159 llvm_unreachable("Invalid SVETypeFlag!");
10160
10161 case SVETypeFlags::EltTyInt8:
10162 return Builder.getInt8Ty();
10163 case SVETypeFlags::EltTyInt16:
10164 return Builder.getInt16Ty();
10165 case SVETypeFlags::EltTyInt32:
10166 return Builder.getInt32Ty();
10167 case SVETypeFlags::EltTyInt64:
10168 return Builder.getInt64Ty();
10169 case SVETypeFlags::EltTyInt128:
10170 return Builder.getInt128Ty();
10171
10172 case SVETypeFlags::EltTyFloat16:
10173 return Builder.getHalfTy();
10174 case SVETypeFlags::EltTyFloat32:
10175 return Builder.getFloatTy();
10176 case SVETypeFlags::EltTyFloat64:
10177 return Builder.getDoubleTy();
10178
10179 case SVETypeFlags::EltTyBFloat16:
10180 return Builder.getBFloatTy();
10181
10182 case SVETypeFlags::EltTyBool8:
10183 case SVETypeFlags::EltTyBool16:
10184 case SVETypeFlags::EltTyBool32:
10185 case SVETypeFlags::EltTyBool64:
10186 return Builder.getInt1Ty();
10187 }
10188}
10189
10190// Return the llvm predicate vector type corresponding to the specified element
10191// TypeFlags.
10192llvm::ScalableVectorType *
10194 switch (TypeFlags.getEltType()) {
10195 default: llvm_unreachable("Unhandled SVETypeFlag!");
10196
10197 case SVETypeFlags::EltTyInt8:
10198 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10199 case SVETypeFlags::EltTyInt16:
10200 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10201 case SVETypeFlags::EltTyInt32:
10202 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10203 case SVETypeFlags::EltTyInt64:
10204 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10205
10206 case SVETypeFlags::EltTyBFloat16:
10207 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10208 case SVETypeFlags::EltTyFloat16:
10209 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10210 case SVETypeFlags::EltTyFloat32:
10211 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10212 case SVETypeFlags::EltTyFloat64:
10213 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10214
10215 case SVETypeFlags::EltTyBool8:
10216 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10217 case SVETypeFlags::EltTyBool16:
10218 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10219 case SVETypeFlags::EltTyBool32:
10220 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10221 case SVETypeFlags::EltTyBool64:
10222 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10223 }
10224}
10225
10226// Return the llvm vector type corresponding to the specified element TypeFlags.
10227llvm::ScalableVectorType *
10228CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
10229 switch (TypeFlags.getEltType()) {
10230 default:
10231 llvm_unreachable("Invalid SVETypeFlag!");
10232
10233 case SVETypeFlags::EltTyInt8:
10234 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10235 case SVETypeFlags::EltTyInt16:
10236 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
10237 case SVETypeFlags::EltTyInt32:
10238 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
10239 case SVETypeFlags::EltTyInt64:
10240 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
10241
10242 case SVETypeFlags::EltTyMFloat8:
10243 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10244 case SVETypeFlags::EltTyFloat16:
10245 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
10246 case SVETypeFlags::EltTyBFloat16:
10247 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
10248 case SVETypeFlags::EltTyFloat32:
10249 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
10250 case SVETypeFlags::EltTyFloat64:
10251 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
10252
10253 case SVETypeFlags::EltTyBool8:
10254 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10255 case SVETypeFlags::EltTyBool16:
10256 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10257 case SVETypeFlags::EltTyBool32:
10258 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10259 case SVETypeFlags::EltTyBool64:
10260 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10261 }
10262}
10263
10264llvm::Value *
10266 Function *Ptrue =
10267 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
10268 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
10269}
10270
10271constexpr unsigned SVEBitsPerBlock = 128;
10272
10273static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
10274 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
10275 return llvm::ScalableVectorType::get(EltTy, NumElts);
10276}
10277
10278// Reinterpret the input predicate so that it can be used to correctly isolate
10279// the elements of the specified datatype.
10281 llvm::ScalableVectorType *VTy) {
10282
10283 if (isa<TargetExtType>(Pred->getType()) &&
10284 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
10285 return Pred;
10286
10287 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
10288 if (Pred->getType() == RTy)
10289 return Pred;
10290
10291 unsigned IntID;
10292 llvm::Type *IntrinsicTy;
10293 switch (VTy->getMinNumElements()) {
10294 default:
10295 llvm_unreachable("unsupported element count!");
10296 case 1:
10297 case 2:
10298 case 4:
10299 case 8:
10300 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
10301 IntrinsicTy = RTy;
10302 break;
10303 case 16:
10304 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
10305 IntrinsicTy = Pred->getType();
10306 break;
10307 }
10308
10309 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
10310 Value *C = Builder.CreateCall(F, Pred);
10311 assert(C->getType() == RTy && "Unexpected return type!");
10312 return C;
10313}
10314
10316 llvm::StructType *Ty) {
10317 if (PredTuple->getType() == Ty)
10318 return PredTuple;
10319
10320 Value *Ret = llvm::PoisonValue::get(Ty);
10321 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
10322 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
10323 Pred = EmitSVEPredicateCast(
10324 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
10325 Ret = Builder.CreateInsertValue(Ret, Pred, I);
10326 }
10327
10328 return Ret;
10329}
10330
10333 unsigned IntID) {
10334 auto *ResultTy = getSVEType(TypeFlags);
10335 auto *OverloadedTy =
10336 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
10337
10338 Function *F = nullptr;
10339 if (Ops[1]->getType()->isVectorTy())
10340 // This is the "vector base, scalar offset" case. In order to uniquely
10341 // map this built-in to an LLVM IR intrinsic, we need both the return type
10342 // and the type of the vector base.
10343 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
10344 else
10345 // This is the "scalar base, vector offset case". The type of the offset
10346 // is encoded in the name of the intrinsic. We only need to specify the
10347 // return type in order to uniquely map this built-in to an LLVM IR
10348 // intrinsic.
10349 F = CGM.getIntrinsic(IntID, OverloadedTy);
10350
10351 // At the ACLE level there's only one predicate type, svbool_t, which is
10352 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10353 // actual type being loaded. For example, when loading doubles (i64) the
10354 // predicate should be <n x 2 x i1> instead. At the IR level the type of
10355 // the predicate and the data being loaded must match. Cast to the type
10356 // expected by the intrinsic. The intrinsic itself should be defined in
10357 // a way than enforces relations between parameter types.
10358 Ops[0] = EmitSVEPredicateCast(
10359 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
10360
10361 // Pass 0 when the offset is missing. This can only be applied when using
10362 // the "vector base" addressing mode for which ACLE allows no offset. The
10363 // corresponding LLVM IR always requires an offset.
10364 if (Ops.size() == 2) {
10365 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10366 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10367 }
10368
10369 // For "vector base, scalar index" scale the index so that it becomes a
10370 // scalar offset.
10371 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
10372 unsigned BytesPerElt =
10373 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10374 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10375 }
10376
10377 Value *Call = Builder.CreateCall(F, Ops);
10378
10379 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
10380 // other cases it's folded into a nop.
10381 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
10382 : Builder.CreateSExt(Call, ResultTy);
10383}
10384
10387 unsigned IntID) {
10388 auto *SrcDataTy = getSVEType(TypeFlags);
10389 auto *OverloadedTy =
10390 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
10391
10392 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
10393 // it's the first argument. Move it accordingly.
10394 Ops.insert(Ops.begin(), Ops.pop_back_val());
10395
10396 Function *F = nullptr;
10397 if (Ops[2]->getType()->isVectorTy())
10398 // This is the "vector base, scalar offset" case. In order to uniquely
10399 // map this built-in to an LLVM IR intrinsic, we need both the return type
10400 // and the type of the vector base.
10401 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
10402 else
10403 // This is the "scalar base, vector offset case". The type of the offset
10404 // is encoded in the name of the intrinsic. We only need to specify the
10405 // return type in order to uniquely map this built-in to an LLVM IR
10406 // intrinsic.
10407 F = CGM.getIntrinsic(IntID, OverloadedTy);
10408
10409 // Pass 0 when the offset is missing. This can only be applied when using
10410 // the "vector base" addressing mode for which ACLE allows no offset. The
10411 // corresponding LLVM IR always requires an offset.
10412 if (Ops.size() == 3) {
10413 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10414 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10415 }
10416
10417 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
10418 // folded into a nop.
10419 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
10420
10421 // At the ACLE level there's only one predicate type, svbool_t, which is
10422 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10423 // actual type being stored. For example, when storing doubles (i64) the
10424 // predicated should be <n x 2 x i1> instead. At the IR level the type of
10425 // the predicate and the data being stored must match. Cast to the type
10426 // expected by the intrinsic. The intrinsic itself should be defined in
10427 // a way that enforces relations between parameter types.
10428 Ops[1] = EmitSVEPredicateCast(
10429 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
10430
10431 // For "vector base, scalar index" scale the index so that it becomes a
10432 // scalar offset.
10433 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
10434 unsigned BytesPerElt =
10435 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10436 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
10437 }
10438
10439 return Builder.CreateCall(F, Ops);
10440}
10441
10444 unsigned IntID) {
10445 // The gather prefetches are overloaded on the vector input - this can either
10446 // be the vector of base addresses or vector of offsets.
10447 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
10448 if (!OverloadedTy)
10449 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
10450
10451 // Cast the predicate from svbool_t to the right number of elements.
10452 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
10453
10454 // vector + imm addressing modes
10455 if (Ops[1]->getType()->isVectorTy()) {
10456 if (Ops.size() == 3) {
10457 // Pass 0 for 'vector+imm' when the index is omitted.
10458 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10459
10460 // The sv_prfop is the last operand in the builtin and IR intrinsic.
10461 std::swap(Ops[2], Ops[3]);
10462 } else {
10463 // Index needs to be passed as scaled offset.
10464 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10465 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
10466 if (BytesPerElt > 1)
10467 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10468 }
10469 }
10470
10471 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
10472 return Builder.CreateCall(F, Ops);
10473}
10474
10477 unsigned IntID) {
10478 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10479 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10480 Value *BasePtr = Ops[1];
10481
10482 // Does the load have an offset?
10483 if (Ops.size() > 2)
10484 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10485
10486 Function *F = CGM.getIntrinsic(IntID, {VTy});
10487 return Builder.CreateCall(F, {Predicate, BasePtr});
10488}
10489
10492 unsigned IntID) {
10493 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10494
10495 unsigned N;
10496 switch (IntID) {
10497 case Intrinsic::aarch64_sve_st2:
10498 case Intrinsic::aarch64_sve_st1_pn_x2:
10499 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10500 case Intrinsic::aarch64_sve_st2q:
10501 N = 2;
10502 break;
10503 case Intrinsic::aarch64_sve_st3:
10504 case Intrinsic::aarch64_sve_st3q:
10505 N = 3;
10506 break;
10507 case Intrinsic::aarch64_sve_st4:
10508 case Intrinsic::aarch64_sve_st1_pn_x4:
10509 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10510 case Intrinsic::aarch64_sve_st4q:
10511 N = 4;
10512 break;
10513 default:
10514 llvm_unreachable("unknown intrinsic!");
10515 }
10516
10517 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10518 Value *BasePtr = Ops[1];
10519
10520 // Does the store have an offset?
10521 if (Ops.size() > (2 + N))
10522 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10523
10524 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10525 // need to break up the tuple vector.
10527 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10528 Operands.push_back(Ops[I]);
10529 Operands.append({Predicate, BasePtr});
10530 Function *F = CGM.getIntrinsic(IntID, { VTy });
10531
10532 return Builder.CreateCall(F, Operands);
10533}
10534
10535// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10536// svpmullt_pair intrinsics, with the exception that their results are bitcast
10537// to a wider type.
10540 unsigned BuiltinID) {
10541 // Splat scalar operand to vector (intrinsics with _n infix)
10542 if (TypeFlags.hasSplatOperand()) {
10543 unsigned OpNo = TypeFlags.getSplatOperand();
10544 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10545 }
10546
10547 // The pair-wise function has a narrower overloaded type.
10548 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10549 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10550
10551 // Now bitcast to the wider result type.
10552 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10553 return EmitSVEReinterpret(Call, Ty);
10554}
10555
10557 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10558 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10559 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10560 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10561}
10562
10565 unsigned BuiltinID) {
10566 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10567 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10568 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10569
10570 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10571 Value *BasePtr = Ops[1];
10572
10573 // Implement the index operand if not omitted.
10574 if (Ops.size() > 3)
10575 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10576
10577 Value *PrfOp = Ops.back();
10578
10579 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10580 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10581}
10582
10584 llvm::Type *ReturnTy,
10586 unsigned IntrinsicID,
10587 bool IsZExtReturn) {
10588 QualType LangPTy = E->getArg(1)->getType();
10589 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10590 LangPTy->castAs<PointerType>()->getPointeeType());
10591
10592 // The vector type that is returned may be different from the
10593 // eventual type loaded from memory.
10594 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10595 llvm::ScalableVectorType *MemoryTy = nullptr;
10596 llvm::ScalableVectorType *PredTy = nullptr;
10597 bool IsQuadLoad = false;
10598 switch (IntrinsicID) {
10599 case Intrinsic::aarch64_sve_ld1uwq:
10600 case Intrinsic::aarch64_sve_ld1udq:
10601 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10602 PredTy = llvm::ScalableVectorType::get(
10603 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10604 IsQuadLoad = true;
10605 break;
10606 default:
10607 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10608 PredTy = MemoryTy;
10609 break;
10610 }
10611
10612 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10613 Value *BasePtr = Ops[1];
10614
10615 // Does the load have an offset?
10616 if (Ops.size() > 2)
10617 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10618
10619 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10620 auto *Load =
10621 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10622 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10623 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10624
10625 if (IsQuadLoad)
10626 return Load;
10627
10628 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10629 : Builder.CreateSExt(Load, VectorTy);
10630}
10631
10634 unsigned IntrinsicID) {
10635 QualType LangPTy = E->getArg(1)->getType();
10636 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10637 LangPTy->castAs<PointerType>()->getPointeeType());
10638
10639 // The vector type that is stored may be different from the
10640 // eventual type stored to memory.
10641 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10642 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10643
10644 auto PredTy = MemoryTy;
10645 auto AddrMemoryTy = MemoryTy;
10646 bool IsQuadStore = false;
10647
10648 switch (IntrinsicID) {
10649 case Intrinsic::aarch64_sve_st1wq:
10650 case Intrinsic::aarch64_sve_st1dq:
10651 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10652 PredTy =
10653 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10654 IsQuadStore = true;
10655 break;
10656 default:
10657 break;
10658 }
10659 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10660 Value *BasePtr = Ops[1];
10661
10662 // Does the store have an offset?
10663 if (Ops.size() == 4)
10664 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10665
10666 // Last value is always the data
10667 Value *Val =
10668 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10669
10670 Function *F =
10671 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10672 auto *Store =
10673 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10674 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10675 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10676 return Store;
10677}
10678
10681 unsigned IntID) {
10682 Ops[2] = EmitSVEPredicateCast(
10684
10685 SmallVector<Value *> NewOps;
10686 NewOps.push_back(Ops[2]);
10687
10688 llvm::Value *BasePtr = Ops[3];
10689 llvm::Value *RealSlice = Ops[1];
10690 // If the intrinsic contains the vnum parameter, multiply it with the vector
10691 // size in bytes.
10692 if (Ops.size() == 5) {
10693 Function *StreamingVectorLength =
10694 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10695 llvm::Value *StreamingVectorLengthCall =
10696 Builder.CreateCall(StreamingVectorLength);
10697 llvm::Value *Mulvl =
10698 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10699 // The type of the ptr parameter is void *, so use Int8Ty here.
10700 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10701 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
10702 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
10703 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
10704 }
10705 NewOps.push_back(BasePtr);
10706 NewOps.push_back(Ops[0]);
10707 NewOps.push_back(RealSlice);
10708 Function *F = CGM.getIntrinsic(IntID);
10709 return Builder.CreateCall(F, NewOps);
10710}
10711
10714 unsigned IntID) {
10715 auto *VecTy = getSVEType(TypeFlags);
10716 Function *F = CGM.getIntrinsic(IntID, VecTy);
10717 if (TypeFlags.isReadZA())
10718 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10719 else if (TypeFlags.isWriteZA())
10720 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10721 return Builder.CreateCall(F, Ops);
10722}
10723
10726 unsigned IntID) {
10727 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10728 if (Ops.size() == 0)
10729 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10730 Function *F = CGM.getIntrinsic(IntID, {});
10731 return Builder.CreateCall(F, Ops);
10732}
10733
10736 unsigned IntID) {
10737 if (Ops.size() == 2)
10738 Ops.push_back(Builder.getInt32(0));
10739 else
10740 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10741 Function *F = CGM.getIntrinsic(IntID, {});
10742 return Builder.CreateCall(F, Ops);
10743}
10744
10745// Limit the usage of scalable llvm IR generated by the ACLE by using the
10746// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10747Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10748 return Builder.CreateVectorSplat(
10749 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10750}
10751
10753 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
10754#ifndef NDEBUG
10755 auto *VecTy = cast<llvm::VectorType>(Ty);
10756 ElementCount EC = VecTy->getElementCount();
10757 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
10758 "Only <1 x i8> expected");
10759#endif
10760 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
10761 }
10762 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10763}
10764
10765Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10766 // FIXME: For big endian this needs an additional REV, or needs a separate
10767 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10768 // instruction is defined as 'bitwise' equivalent from memory point of
10769 // view (when storing/reloading), whereas the svreinterpret builtin
10770 // implements bitwise equivalent cast from register point of view.
10771 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10772
10773 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
10774 Value *Tuple = llvm::PoisonValue::get(Ty);
10775
10776 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10777 Value *In = Builder.CreateExtractValue(Val, I);
10778 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10779 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
10780 }
10781
10782 return Tuple;
10783 }
10784
10785 return Builder.CreateBitCast(Val, Ty);
10786}
10787
10788static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10790 auto *SplatZero = Constant::getNullValue(Ty);
10791 Ops.insert(Ops.begin(), SplatZero);
10792}
10793
10794static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10796 auto *SplatUndef = UndefValue::get(Ty);
10797 Ops.insert(Ops.begin(), SplatUndef);
10798}
10799
10802 llvm::Type *ResultType,
10803 ArrayRef<Value *> Ops) {
10804 if (TypeFlags.isOverloadNone())
10805 return {};
10806
10807 llvm::Type *DefaultType = getSVEType(TypeFlags);
10808
10809 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10810 return {DefaultType, Ops[1]->getType()};
10811
10812 if (TypeFlags.isOverloadWhileRW())
10813 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10814
10815 if (TypeFlags.isOverloadCvt())
10816 return {Ops[0]->getType(), Ops.back()->getType()};
10817
10818 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10819 ResultType->isVectorTy())
10820 return {ResultType, Ops[1]->getType()};
10821
10822 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10823 return {DefaultType};
10824}
10825
10827 ArrayRef<Value *> Ops) {
10828 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10829 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10830 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
10831
10832 if (TypeFlags.isTupleSet())
10833 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
10834 return Builder.CreateExtractValue(Ops[0], Idx);
10835}
10836
10838 llvm::Type *Ty,
10839 ArrayRef<Value *> Ops) {
10840 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10841
10842 Value *Tuple = llvm::PoisonValue::get(Ty);
10843 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
10844 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
10845
10846 return Tuple;
10847}
10848
10850 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10851 SVETypeFlags TypeFlags) {
10852 // Find out if any arguments are required to be integer constant expressions.
10853 unsigned ICEArguments = 0;
10855 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10856 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10857
10858 // Tuple set/get only requires one insert/extract vector, which is
10859 // created by EmitSVETupleSetOrGet.
10860 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10861
10862 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10863 bool IsICE = ICEArguments & (1 << i);
10864 Value *Arg = EmitScalarExpr(E->getArg(i));
10865
10866 if (IsICE) {
10867 // If this is required to be a constant, constant fold it so that we know
10868 // that the generated intrinsic gets a ConstantInt.
10869 std::optional<llvm::APSInt> Result =
10870 E->getArg(i)->getIntegerConstantExpr(getContext());
10871 assert(Result && "Expected argument to be a constant");
10872
10873 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10874 // truncate because the immediate has been range checked and no valid
10875 // immediate requires more than a handful of bits.
10876 *Result = Result->extOrTrunc(32);
10877 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10878 continue;
10879 }
10880
10881 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10882 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10883 Ops.push_back(Builder.CreateExtractValue(Arg, I));
10884
10885 continue;
10886 }
10887
10888 Ops.push_back(Arg);
10889 }
10890}
10891
10893 const CallExpr *E) {
10894 llvm::Type *Ty = ConvertType(E->getType());
10895 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10896 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10897 Value *Val = EmitScalarExpr(E->getArg(0));
10898 return EmitSVEReinterpret(Val, Ty);
10899 }
10900
10901 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10903
10905 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10906 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10907
10908 if (TypeFlags.isLoad())
10909 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10910 TypeFlags.isZExtReturn());
10911 else if (TypeFlags.isStore())
10912 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10913 else if (TypeFlags.isGatherLoad())
10914 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10915 else if (TypeFlags.isScatterStore())
10916 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10917 else if (TypeFlags.isPrefetch())
10918 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10919 else if (TypeFlags.isGatherPrefetch())
10920 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10921 else if (TypeFlags.isStructLoad())
10922 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10923 else if (TypeFlags.isStructStore())
10924 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10925 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10926 return EmitSVETupleSetOrGet(TypeFlags, Ops);
10927 else if (TypeFlags.isTupleCreate())
10928 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10929 else if (TypeFlags.isUndef())
10930 return UndefValue::get(Ty);
10931 else if (Builtin->LLVMIntrinsic != 0) {
10932 // Emit set FPMR for intrinsics that require it
10933 if (TypeFlags.setsFPMR())
10934 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
10935 Ops.pop_back_val());
10936 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10938
10939 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10941
10942 // Some ACLE builtins leave out the argument to specify the predicate
10943 // pattern, which is expected to be expanded to an SV_ALL pattern.
10944 if (TypeFlags.isAppendSVALL())
10945 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10946 if (TypeFlags.isInsertOp1SVALL())
10947 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10948
10949 // Predicates must match the main datatype.
10950 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10951 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10952 if (PredTy->getElementType()->isIntegerTy(1))
10953 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10954
10955 // Splat scalar operand to vector (intrinsics with _n infix)
10956 if (TypeFlags.hasSplatOperand()) {
10957 unsigned OpNo = TypeFlags.getSplatOperand();
10958 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10959 }
10960
10961 if (TypeFlags.isReverseCompare())
10962 std::swap(Ops[1], Ops[2]);
10963 else if (TypeFlags.isReverseUSDOT())
10964 std::swap(Ops[1], Ops[2]);
10965 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10966 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10967 std::swap(Ops[1], Ops[2]);
10968 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10969 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10970 std::swap(Ops[1], Ops[3]);
10971
10972 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10973 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10974 llvm::Type *OpndTy = Ops[1]->getType();
10975 auto *SplatZero = Constant::getNullValue(OpndTy);
10976 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10977 }
10978
10979 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10980 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10981 Value *Call = Builder.CreateCall(F, Ops);
10982
10983 if (Call->getType() == Ty)
10984 return Call;
10985
10986 // Predicate results must be converted to svbool_t.
10987 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
10988 return EmitSVEPredicateCast(Call, PredTy);
10989 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
10990 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
10991
10992 llvm_unreachable("unsupported element count!");
10993 }
10994
10995 switch (BuiltinID) {
10996 default:
10997 return nullptr;
10998
10999 case SVE::BI__builtin_sve_svreinterpret_b: {
11000 auto SVCountTy =
11001 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
11002 Function *CastFromSVCountF =
11003 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
11004 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
11005 }
11006 case SVE::BI__builtin_sve_svreinterpret_c: {
11007 auto SVCountTy =
11008 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
11009 Function *CastToSVCountF =
11010 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
11011 return Builder.CreateCall(CastToSVCountF, Ops[0]);
11012 }
11013
11014 case SVE::BI__builtin_sve_svpsel_lane_b8:
11015 case SVE::BI__builtin_sve_svpsel_lane_b16:
11016 case SVE::BI__builtin_sve_svpsel_lane_b32:
11017 case SVE::BI__builtin_sve_svpsel_lane_b64:
11018 case SVE::BI__builtin_sve_svpsel_lane_c8:
11019 case SVE::BI__builtin_sve_svpsel_lane_c16:
11020 case SVE::BI__builtin_sve_svpsel_lane_c32:
11021 case SVE::BI__builtin_sve_svpsel_lane_c64: {
11022 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
11023 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
11024 "aarch64.svcount")) &&
11025 "Unexpected TargetExtType");
11026 auto SVCountTy =
11027 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
11028 Function *CastFromSVCountF =
11029 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
11030 Function *CastToSVCountF =
11031 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
11032
11033 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
11034 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
11035 llvm::Value *Ops0 =
11036 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
11037 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
11038 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
11039 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
11040 }
11041 case SVE::BI__builtin_sve_svmov_b_z: {
11042 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
11043 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11044 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
11045 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
11046 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
11047 }
11048
11049 case SVE::BI__builtin_sve_svnot_b_z: {
11050 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
11051 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11052 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
11053 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
11054 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
11055 }
11056
11057 case SVE::BI__builtin_sve_svmovlb_u16:
11058 case SVE::BI__builtin_sve_svmovlb_u32:
11059 case SVE::BI__builtin_sve_svmovlb_u64:
11060 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
11061
11062 case SVE::BI__builtin_sve_svmovlb_s16:
11063 case SVE::BI__builtin_sve_svmovlb_s32:
11064 case SVE::BI__builtin_sve_svmovlb_s64:
11065 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
11066
11067 case SVE::BI__builtin_sve_svmovlt_u16:
11068 case SVE::BI__builtin_sve_svmovlt_u32:
11069 case SVE::BI__builtin_sve_svmovlt_u64:
11070 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
11071
11072 case SVE::BI__builtin_sve_svmovlt_s16:
11073 case SVE::BI__builtin_sve_svmovlt_s32:
11074 case SVE::BI__builtin_sve_svmovlt_s64:
11075 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
11076
11077 case SVE::BI__builtin_sve_svpmullt_u16:
11078 case SVE::BI__builtin_sve_svpmullt_u64:
11079 case SVE::BI__builtin_sve_svpmullt_n_u16:
11080 case SVE::BI__builtin_sve_svpmullt_n_u64:
11081 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
11082
11083 case SVE::BI__builtin_sve_svpmullb_u16:
11084 case SVE::BI__builtin_sve_svpmullb_u64:
11085 case SVE::BI__builtin_sve_svpmullb_n_u16:
11086 case SVE::BI__builtin_sve_svpmullb_n_u64:
11087 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
11088
11089 case SVE::BI__builtin_sve_svdup_n_b8:
11090 case SVE::BI__builtin_sve_svdup_n_b16:
11091 case SVE::BI__builtin_sve_svdup_n_b32:
11092 case SVE::BI__builtin_sve_svdup_n_b64: {
11093 Value *CmpNE =
11094 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
11095 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
11096 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
11097 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
11098 }
11099
11100 case SVE::BI__builtin_sve_svdupq_n_b8:
11101 case SVE::BI__builtin_sve_svdupq_n_b16:
11102 case SVE::BI__builtin_sve_svdupq_n_b32:
11103 case SVE::BI__builtin_sve_svdupq_n_b64:
11104 case SVE::BI__builtin_sve_svdupq_n_u8:
11105 case SVE::BI__builtin_sve_svdupq_n_s8:
11106 case SVE::BI__builtin_sve_svdupq_n_u64:
11107 case SVE::BI__builtin_sve_svdupq_n_f64:
11108 case SVE::BI__builtin_sve_svdupq_n_s64:
11109 case SVE::BI__builtin_sve_svdupq_n_u16:
11110 case SVE::BI__builtin_sve_svdupq_n_f16:
11111 case SVE::BI__builtin_sve_svdupq_n_bf16:
11112 case SVE::BI__builtin_sve_svdupq_n_s16:
11113 case SVE::BI__builtin_sve_svdupq_n_u32:
11114 case SVE::BI__builtin_sve_svdupq_n_f32:
11115 case SVE::BI__builtin_sve_svdupq_n_s32: {
11116 // These builtins are implemented by storing each element to an array and using
11117 // ld1rq to materialize a vector.
11118 unsigned NumOpnds = Ops.size();
11119
11120 bool IsBoolTy =
11121 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
11122
11123 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
11124 // so that the compare can use the width that is natural for the expected
11125 // number of predicate lanes.
11126 llvm::Type *EltTy = Ops[0]->getType();
11127 if (IsBoolTy)
11128 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
11129
11131 for (unsigned I = 0; I < NumOpnds; ++I)
11132 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
11133 Value *Vec = BuildVector(VecOps);
11134
11135 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
11136 Value *InsertSubVec = Builder.CreateInsertVector(
11137 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
11138
11139 Function *F =
11140 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
11141 Value *DupQLane =
11142 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
11143
11144 if (!IsBoolTy)
11145 return DupQLane;
11146
11147 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11148 Value *Pred = EmitSVEAllTruePred(TypeFlags);
11149
11150 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
11151 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
11152 : Intrinsic::aarch64_sve_cmpne_wide,
11153 OverloadedTy);
11154 Value *Call = Builder.CreateCall(
11155 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
11156 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
11157 }
11158
11159 case SVE::BI__builtin_sve_svpfalse_b:
11160 return ConstantInt::getFalse(Ty);
11161
11162 case SVE::BI__builtin_sve_svpfalse_c: {
11163 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
11164 Function *CastToSVCountF =
11165 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
11166 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
11167 }
11168
11169 case SVE::BI__builtin_sve_svlen_bf16:
11170 case SVE::BI__builtin_sve_svlen_f16:
11171 case SVE::BI__builtin_sve_svlen_f32:
11172 case SVE::BI__builtin_sve_svlen_f64:
11173 case SVE::BI__builtin_sve_svlen_s8:
11174 case SVE::BI__builtin_sve_svlen_s16:
11175 case SVE::BI__builtin_sve_svlen_s32:
11176 case SVE::BI__builtin_sve_svlen_s64:
11177 case SVE::BI__builtin_sve_svlen_u8:
11178 case SVE::BI__builtin_sve_svlen_u16:
11179 case SVE::BI__builtin_sve_svlen_u32:
11180 case SVE::BI__builtin_sve_svlen_u64: {
11181 SVETypeFlags TF(Builtin->TypeModifier);
11182 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
11183 auto *NumEls =
11184 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
11185
11186 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
11187 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
11188 }
11189
11190 case SVE::BI__builtin_sve_svtbl2_u8:
11191 case SVE::BI__builtin_sve_svtbl2_s8:
11192 case SVE::BI__builtin_sve_svtbl2_u16:
11193 case SVE::BI__builtin_sve_svtbl2_s16:
11194 case SVE::BI__builtin_sve_svtbl2_u32:
11195 case SVE::BI__builtin_sve_svtbl2_s32:
11196 case SVE::BI__builtin_sve_svtbl2_u64:
11197 case SVE::BI__builtin_sve_svtbl2_s64:
11198 case SVE::BI__builtin_sve_svtbl2_f16:
11199 case SVE::BI__builtin_sve_svtbl2_bf16:
11200 case SVE::BI__builtin_sve_svtbl2_f32:
11201 case SVE::BI__builtin_sve_svtbl2_f64: {
11202 SVETypeFlags TF(Builtin->TypeModifier);
11203 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
11204 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
11205 return Builder.CreateCall(F, Ops);
11206 }
11207
11208 case SVE::BI__builtin_sve_svset_neonq_s8:
11209 case SVE::BI__builtin_sve_svset_neonq_s16:
11210 case SVE::BI__builtin_sve_svset_neonq_s32:
11211 case SVE::BI__builtin_sve_svset_neonq_s64:
11212 case SVE::BI__builtin_sve_svset_neonq_u8:
11213 case SVE::BI__builtin_sve_svset_neonq_u16:
11214 case SVE::BI__builtin_sve_svset_neonq_u32:
11215 case SVE::BI__builtin_sve_svset_neonq_u64:
11216 case SVE::BI__builtin_sve_svset_neonq_f16:
11217 case SVE::BI__builtin_sve_svset_neonq_f32:
11218 case SVE::BI__builtin_sve_svset_neonq_f64:
11219 case SVE::BI__builtin_sve_svset_neonq_bf16: {
11220 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
11221 }
11222
11223 case SVE::BI__builtin_sve_svget_neonq_s8:
11224 case SVE::BI__builtin_sve_svget_neonq_s16:
11225 case SVE::BI__builtin_sve_svget_neonq_s32:
11226 case SVE::BI__builtin_sve_svget_neonq_s64:
11227 case SVE::BI__builtin_sve_svget_neonq_u8:
11228 case SVE::BI__builtin_sve_svget_neonq_u16:
11229 case SVE::BI__builtin_sve_svget_neonq_u32:
11230 case SVE::BI__builtin_sve_svget_neonq_u64:
11231 case SVE::BI__builtin_sve_svget_neonq_f16:
11232 case SVE::BI__builtin_sve_svget_neonq_f32:
11233 case SVE::BI__builtin_sve_svget_neonq_f64:
11234 case SVE::BI__builtin_sve_svget_neonq_bf16: {
11235 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
11236 }
11237
11238 case SVE::BI__builtin_sve_svdup_neonq_s8:
11239 case SVE::BI__builtin_sve_svdup_neonq_s16:
11240 case SVE::BI__builtin_sve_svdup_neonq_s32:
11241 case SVE::BI__builtin_sve_svdup_neonq_s64:
11242 case SVE::BI__builtin_sve_svdup_neonq_u8:
11243 case SVE::BI__builtin_sve_svdup_neonq_u16:
11244 case SVE::BI__builtin_sve_svdup_neonq_u32:
11245 case SVE::BI__builtin_sve_svdup_neonq_u64:
11246 case SVE::BI__builtin_sve_svdup_neonq_f16:
11247 case SVE::BI__builtin_sve_svdup_neonq_f32:
11248 case SVE::BI__builtin_sve_svdup_neonq_f64:
11249 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
11250 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
11251 Builder.getInt64(0));
11252 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
11253 {Insert, Builder.getInt64(0)});
11254 }
11255 }
11256
11257 /// Should not happen
11258 return nullptr;
11259}
11260
11261static void swapCommutativeSMEOperands(unsigned BuiltinID,
11263 unsigned MultiVec;
11264 switch (BuiltinID) {
11265 default:
11266 return;
11267 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
11268 MultiVec = 1;
11269 break;
11270 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
11271 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
11272 MultiVec = 2;
11273 break;
11274 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
11275 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
11276 MultiVec = 4;
11277 break;
11278 }
11279
11280 if (MultiVec > 0)
11281 for (unsigned I = 0; I < MultiVec; ++I)
11282 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
11283}
11284
11286 const CallExpr *E) {
11287 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
11289
11291 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11292 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
11293
11294 if (TypeFlags.isLoad() || TypeFlags.isStore())
11295 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11296 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
11297 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11298 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
11299 BuiltinID == SME::BI__builtin_sme_svzero_za)
11300 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11301 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
11302 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
11303 BuiltinID == SME::BI__builtin_sme_svldr_za ||
11304 BuiltinID == SME::BI__builtin_sme_svstr_za)
11305 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11306
11307 // Emit set FPMR for intrinsics that require it
11308 if (TypeFlags.setsFPMR())
11309 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
11310 Ops.pop_back_val());
11311 // Handle builtins which require their multi-vector operands to be swapped
11312 swapCommutativeSMEOperands(BuiltinID, Ops);
11313
11314 // Should not happen!
11315 if (Builtin->LLVMIntrinsic == 0)
11316 return nullptr;
11317
11318 if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
11319 // If we already know the streaming mode, don't bother with the intrinsic
11320 // and emit a constant instead
11321 const auto *FD = cast<FunctionDecl>(CurFuncDecl);
11322 if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
11323 unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
11324 if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
11325 bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
11326 return ConstantInt::getBool(Builder.getContext(), IsStreaming);
11327 }
11328 }
11329 }
11330
11331 // Predicates must match the main datatype.
11332 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
11333 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
11334 if (PredTy->getElementType()->isIntegerTy(1))
11335 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
11336
11337 Function *F =
11338 TypeFlags.isOverloadNone()
11339 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
11340 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
11341
11342 return Builder.CreateCall(F, Ops);
11343}
11344
11346 const CallExpr *E,
11347 llvm::Triple::ArchType Arch) {
11348 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
11349 BuiltinID <= clang::AArch64::LastSVEBuiltin)
11350 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
11351
11352 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
11353 BuiltinID <= clang::AArch64::LastSMEBuiltin)
11354 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
11355
11356 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
11357 return EmitAArch64CpuSupports(E);
11358
11359 unsigned HintID = static_cast<unsigned>(-1);
11360 switch (BuiltinID) {
11361 default: break;
11362 case clang::AArch64::BI__builtin_arm_nop:
11363 HintID = 0;
11364 break;
11365 case clang::AArch64::BI__builtin_arm_yield:
11366 case clang::AArch64::BI__yield:
11367 HintID = 1;
11368 break;
11369 case clang::AArch64::BI__builtin_arm_wfe:
11370 case clang::AArch64::BI__wfe:
11371 HintID = 2;
11372 break;
11373 case clang::AArch64::BI__builtin_arm_wfi:
11374 case clang::AArch64::BI__wfi:
11375 HintID = 3;
11376 break;
11377 case clang::AArch64::BI__builtin_arm_sev:
11378 case clang::AArch64::BI__sev:
11379 HintID = 4;
11380 break;
11381 case clang::AArch64::BI__builtin_arm_sevl:
11382 case clang::AArch64::BI__sevl:
11383 HintID = 5;
11384 break;
11385 }
11386
11387 if (HintID != static_cast<unsigned>(-1)) {
11388 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
11389 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
11390 }
11391
11392 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
11393 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
11394 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11395 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
11396 }
11397
11398 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
11399 // Create call to __arm_sme_state and store the results to the two pointers.
11401 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
11402 false),
11403 "__arm_sme_state"));
11404 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
11405 "aarch64_pstate_sm_compatible");
11406 CI->setAttributes(Attrs);
11407 CI->setCallingConv(
11408 llvm::CallingConv::
11409 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
11410 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
11411 EmitPointerWithAlignment(E->getArg(0)));
11412 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
11413 EmitPointerWithAlignment(E->getArg(1)));
11414 }
11415
11416 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
11417 assert((getContext().getTypeSize(E->getType()) == 32) &&
11418 "rbit of unusual size!");
11419 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11420 return Builder.CreateCall(
11421 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11422 }
11423 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
11424 assert((getContext().getTypeSize(E->getType()) == 64) &&
11425 "rbit of unusual size!");
11426 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11427 return Builder.CreateCall(
11428 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11429 }
11430
11431 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
11432 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
11433 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11434 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
11435 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11436 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
11437 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
11438 return Res;
11439 }
11440
11441 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
11442 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11443 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
11444 "cls");
11445 }
11446 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11447 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11448 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
11449 "cls");
11450 }
11451
11452 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11453 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11454 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11455 llvm::Type *Ty = Arg->getType();
11456 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11457 Arg, "frint32z");
11458 }
11459
11460 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11461 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11462 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11463 llvm::Type *Ty = Arg->getType();
11464 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11465 Arg, "frint64z");
11466 }
11467
11468 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11469 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11470 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11471 llvm::Type *Ty = Arg->getType();
11472 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11473 Arg, "frint32x");
11474 }
11475
11476 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11477 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11478 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11479 llvm::Type *Ty = Arg->getType();
11480 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11481 Arg, "frint64x");
11482 }
11483
11484 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11485 assert((getContext().getTypeSize(E->getType()) == 32) &&
11486 "__jcvt of unusual size!");
11487 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11488 return Builder.CreateCall(
11489 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11490 }
11491
11492 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11493 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11494 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11495 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11496 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11497 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11498
11499 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11500 // Load from the address via an LLVM intrinsic, receiving a
11501 // tuple of 8 i64 words, and store each one to ValPtr.
11502 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11503 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11504 llvm::Value *ToRet;
11505 for (size_t i = 0; i < 8; i++) {
11506 llvm::Value *ValOffsetPtr =
11507 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11508 Address Addr =
11509 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11510 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11511 }
11512 return ToRet;
11513 } else {
11514 // Load 8 i64 words from ValPtr, and store them to the address
11515 // via an LLVM intrinsic.
11517 Args.push_back(MemAddr);
11518 for (size_t i = 0; i < 8; i++) {
11519 llvm::Value *ValOffsetPtr =
11520 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11521 Address Addr =
11522 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11523 Args.push_back(Builder.CreateLoad(Addr));
11524 }
11525
11526 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11527 ? Intrinsic::aarch64_st64b
11528 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11529 ? Intrinsic::aarch64_st64bv
11530 : Intrinsic::aarch64_st64bv0);
11531 Function *F = CGM.getIntrinsic(Intr);
11532 return Builder.CreateCall(F, Args);
11533 }
11534 }
11535
11536 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11537 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11538
11539 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11540 ? Intrinsic::aarch64_rndr
11541 : Intrinsic::aarch64_rndrrs);
11542 Function *F = CGM.getIntrinsic(Intr);
11543 llvm::Value *Val = Builder.CreateCall(F);
11544 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11545 Value *Status = Builder.CreateExtractValue(Val, 1);
11546
11547 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11548 Builder.CreateStore(RandomValue, MemAddress);
11549 Status = Builder.CreateZExt(Status, Int32Ty);
11550 return Status;
11551 }
11552
11553 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11554 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11555 const FunctionDecl *FD = E->getDirectCallee();
11556 Value *Ops[2];
11557 for (unsigned i = 0; i < 2; i++)
11558 Ops[i] = EmitScalarExpr(E->getArg(i));
11559 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11560 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11561 StringRef Name = FD->getName();
11562 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11563 }
11564
11565 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11566 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11567 getContext().getTypeSize(E->getType()) == 128) {
11568 Function *F =
11569 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11570 ? Intrinsic::aarch64_ldaxp
11571 : Intrinsic::aarch64_ldxp);
11572
11573 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11574 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11575
11576 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11577 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11578 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11579 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11580 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11581
11582 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11583 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11584 Val = Builder.CreateOr(Val, Val1);
11585 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11586 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11587 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11588 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11589
11590 QualType Ty = E->getType();
11591 llvm::Type *RealResTy = ConvertType(Ty);
11592 llvm::Type *IntTy =
11593 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11594
11595 Function *F =
11596 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11597 ? Intrinsic::aarch64_ldaxr
11598 : Intrinsic::aarch64_ldxr,
11599 UnqualPtrTy);
11600 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11601 Val->addParamAttr(
11602 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11603
11604 if (RealResTy->isPointerTy())
11605 return Builder.CreateIntToPtr(Val, RealResTy);
11606
11607 llvm::Type *IntResTy = llvm::IntegerType::get(
11608 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11609 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11610 RealResTy);
11611 }
11612
11613 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11614 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11615 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11616 Function *F =
11617 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11618 ? Intrinsic::aarch64_stlxp
11619 : Intrinsic::aarch64_stxp);
11620 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11621
11622 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11623 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11624
11625 Tmp = Tmp.withElementType(STy);
11626 llvm::Value *Val = Builder.CreateLoad(Tmp);
11627
11628 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11629 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11630 Value *StPtr = EmitScalarExpr(E->getArg(1));
11631 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11632 }
11633
11634 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11635 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11636 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11637 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11638
11639 QualType Ty = E->getArg(0)->getType();
11640 llvm::Type *StoreTy =
11641 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11642
11643 if (StoreVal->getType()->isPointerTy())
11644 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11645 else {
11646 llvm::Type *IntTy = llvm::IntegerType::get(
11648 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11649 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11650 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11651 }
11652
11653 Function *F =
11654 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11655 ? Intrinsic::aarch64_stlxr
11656 : Intrinsic::aarch64_stxr,
11657 StoreAddr->getType());
11658 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11659 CI->addParamAttr(
11660 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11661 return CI;
11662 }
11663
11664 if (BuiltinID == clang::AArch64::BI__getReg) {
11666 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11667 llvm_unreachable("Sema will ensure that the parameter is constant");
11668
11669 llvm::APSInt Value = Result.Val.getInt();
11670 LLVMContext &Context = CGM.getLLVMContext();
11671 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11672
11673 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11674 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11675 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11676
11677 llvm::Function *F =
11678 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11679 return Builder.CreateCall(F, Metadata);
11680 }
11681
11682 if (BuiltinID == clang::AArch64::BI__break) {
11684 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11685 llvm_unreachable("Sema will ensure that the parameter is constant");
11686
11687 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11688 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11689 }
11690
11691 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11692 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11693 return Builder.CreateCall(F);
11694 }
11695
11696 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11697 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11698 llvm::SyncScope::SingleThread);
11699
11700 // CRC32
11701 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11702 switch (BuiltinID) {
11703 case clang::AArch64::BI__builtin_arm_crc32b:
11704 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11705 case clang::AArch64::BI__builtin_arm_crc32cb:
11706 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11707 case clang::AArch64::BI__builtin_arm_crc32h:
11708 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11709 case clang::AArch64::BI__builtin_arm_crc32ch:
11710 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11711 case clang::AArch64::BI__builtin_arm_crc32w:
11712 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11713 case clang::AArch64::BI__builtin_arm_crc32cw:
11714 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11715 case clang::AArch64::BI__builtin_arm_crc32d:
11716 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11717 case clang::AArch64::BI__builtin_arm_crc32cd:
11718 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11719 }
11720
11721 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11722 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11723 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11724 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11725
11726 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11727 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11728
11729 return Builder.CreateCall(F, {Arg0, Arg1});
11730 }
11731
11732 // Memory Operations (MOPS)
11733 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11734 Value *Dst = EmitScalarExpr(E->getArg(0));
11735 Value *Val = EmitScalarExpr(E->getArg(1));
11736 Value *Size = EmitScalarExpr(E->getArg(2));
11737 Val = Builder.CreateTrunc(Val, Int8Ty);
11738 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11739 return Builder.CreateCall(
11740 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11741 }
11742
11743 // Memory Tagging Extensions (MTE) Intrinsics
11744 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11745 switch (BuiltinID) {
11746 case clang::AArch64::BI__builtin_arm_irg:
11747 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11748 case clang::AArch64::BI__builtin_arm_addg:
11749 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11750 case clang::AArch64::BI__builtin_arm_gmi:
11751 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11752 case clang::AArch64::BI__builtin_arm_ldg:
11753 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11754 case clang::AArch64::BI__builtin_arm_stg:
11755 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11756 case clang::AArch64::BI__builtin_arm_subp:
11757 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11758 }
11759
11760 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11761 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11762 Value *Pointer = EmitScalarExpr(E->getArg(0));
11763 Value *Mask = EmitScalarExpr(E->getArg(1));
11764
11765 Mask = Builder.CreateZExt(Mask, Int64Ty);
11766 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11767 {Pointer, Mask});
11768 }
11769 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11770 Value *Pointer = EmitScalarExpr(E->getArg(0));
11771 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11772
11773 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11774 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11775 {Pointer, TagOffset});
11776 }
11777 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11778 Value *Pointer = EmitScalarExpr(E->getArg(0));
11779 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11780
11781 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11782 return Builder.CreateCall(
11783 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11784 }
11785 // Although it is possible to supply a different return
11786 // address (first arg) to this intrinsic, for now we set
11787 // return address same as input address.
11788 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11789 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11790 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11791 {TagAddress, TagAddress});
11792 }
11793 // Although it is possible to supply a different tag (to set)
11794 // to this intrinsic (as first arg), for now we supply
11795 // the tag that is in input address arg (common use case).
11796 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11797 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11798 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11799 {TagAddress, TagAddress});
11800 }
11801 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11802 Value *PointerA = EmitScalarExpr(E->getArg(0));
11803 Value *PointerB = EmitScalarExpr(E->getArg(1));
11804 return Builder.CreateCall(
11805 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11806 }
11807 }
11808
11809 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11810 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11811 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11812 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11813 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11814 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11815 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11816 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11817
11818 SpecialRegisterAccessKind AccessKind = Write;
11819 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11820 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11821 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11822 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11823 AccessKind = VolatileRead;
11824
11825 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11826 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11827
11828 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11829 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11830
11831 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11832 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11833
11834 llvm::Type *ValueType;
11835 llvm::Type *RegisterType = Int64Ty;
11836 if (Is32Bit) {
11837 ValueType = Int32Ty;
11838 } else if (Is128Bit) {
11839 llvm::Type *Int128Ty =
11840 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11841 ValueType = Int128Ty;
11842 RegisterType = Int128Ty;
11843 } else if (IsPointerBuiltin) {
11844 ValueType = VoidPtrTy;
11845 } else {
11846 ValueType = Int64Ty;
11847 };
11848
11849 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11850 AccessKind);
11851 }
11852
11853 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11854 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11855 LLVMContext &Context = CGM.getLLVMContext();
11856
11857 unsigned SysReg =
11858 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11859
11860 std::string SysRegStr;
11861 llvm::raw_string_ostream(SysRegStr) <<
11862 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11863 ((SysReg >> 11) & 7) << ":" <<
11864 ((SysReg >> 7) & 15) << ":" <<
11865 ((SysReg >> 3) & 15) << ":" <<
11866 ( SysReg & 7);
11867
11868 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11869 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11870 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11871
11872 llvm::Type *RegisterType = Int64Ty;
11873 llvm::Type *Types[] = { RegisterType };
11874
11875 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11876 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11877
11878 return Builder.CreateCall(F, Metadata);
11879 }
11880
11881 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11882 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11883
11884 return Builder.CreateCall(F, { Metadata, ArgValue });
11885 }
11886
11887 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11888 llvm::Function *F =
11889 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11890 return Builder.CreateCall(F);
11891 }
11892
11893 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11894 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11895 return Builder.CreateCall(F);
11896 }
11897
11898 if (BuiltinID == clang::AArch64::BI__mulh ||
11899 BuiltinID == clang::AArch64::BI__umulh) {
11900 llvm::Type *ResType = ConvertType(E->getType());
11901 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11902
11903 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11904 Value *LHS =
11905 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11906 Value *RHS =
11907 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11908
11909 Value *MulResult, *HigherBits;
11910 if (IsSigned) {
11911 MulResult = Builder.CreateNSWMul(LHS, RHS);
11912 HigherBits = Builder.CreateAShr(MulResult, 64);
11913 } else {
11914 MulResult = Builder.CreateNUWMul(LHS, RHS);
11915 HigherBits = Builder.CreateLShr(MulResult, 64);
11916 }
11917 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11918
11919 return HigherBits;
11920 }
11921
11922 if (BuiltinID == AArch64::BI__writex18byte ||
11923 BuiltinID == AArch64::BI__writex18word ||
11924 BuiltinID == AArch64::BI__writex18dword ||
11925 BuiltinID == AArch64::BI__writex18qword) {
11926 // Process the args first
11927 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11928 Value *DataArg = EmitScalarExpr(E->getArg(1));
11929
11930 // Read x18 as i8*
11931 llvm::Value *X18 = readX18AsPtr(*this);
11932
11933 // Store val at x18 + offset
11934 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11935 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11936 StoreInst *Store =
11937 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
11938 return Store;
11939 }
11940
11941 if (BuiltinID == AArch64::BI__readx18byte ||
11942 BuiltinID == AArch64::BI__readx18word ||
11943 BuiltinID == AArch64::BI__readx18dword ||
11944 BuiltinID == AArch64::BI__readx18qword) {
11945 // Process the args first
11946 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11947
11948 // Read x18 as i8*
11949 llvm::Value *X18 = readX18AsPtr(*this);
11950
11951 // Load x18 + offset
11952 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11953 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11954 llvm::Type *IntTy = ConvertType(E->getType());
11955 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11956 return Load;
11957 }
11958
11959 if (BuiltinID == AArch64::BI__addx18byte ||
11960 BuiltinID == AArch64::BI__addx18word ||
11961 BuiltinID == AArch64::BI__addx18dword ||
11962 BuiltinID == AArch64::BI__addx18qword ||
11963 BuiltinID == AArch64::BI__incx18byte ||
11964 BuiltinID == AArch64::BI__incx18word ||
11965 BuiltinID == AArch64::BI__incx18dword ||
11966 BuiltinID == AArch64::BI__incx18qword) {
11967 llvm::Type *IntTy;
11968 bool isIncrement;
11969 switch (BuiltinID) {
11970 case AArch64::BI__incx18byte:
11971 IntTy = Int8Ty;
11972 isIncrement = true;
11973 break;
11974 case AArch64::BI__incx18word:
11975 IntTy = Int16Ty;
11976 isIncrement = true;
11977 break;
11978 case AArch64::BI__incx18dword:
11979 IntTy = Int32Ty;
11980 isIncrement = true;
11981 break;
11982 case AArch64::BI__incx18qword:
11983 IntTy = Int64Ty;
11984 isIncrement = true;
11985 break;
11986 default:
11987 IntTy = ConvertType(E->getArg(1)->getType());
11988 isIncrement = false;
11989 break;
11990 }
11991 // Process the args first
11992 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11993 Value *ValToAdd =
11994 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
11995
11996 // Read x18 as i8*
11997 llvm::Value *X18 = readX18AsPtr(*this);
11998
11999 // Load x18 + offset
12000 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
12001 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
12002 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
12003
12004 // Add values
12005 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
12006
12007 // Store val at x18 + offset
12008 StoreInst *Store =
12009 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
12010 return Store;
12011 }
12012
12013 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
12014 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
12015 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
12016 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
12017 Value *Arg = EmitScalarExpr(E->getArg(0));
12018 llvm::Type *RetTy = ConvertType(E->getType());
12019 return Builder.CreateBitCast(Arg, RetTy);
12020 }
12021
12022 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
12023 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
12024 BuiltinID == AArch64::BI_CountLeadingZeros ||
12025 BuiltinID == AArch64::BI_CountLeadingZeros64) {
12026 Value *Arg = EmitScalarExpr(E->getArg(0));
12027 llvm::Type *ArgType = Arg->getType();
12028
12029 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
12030 BuiltinID == AArch64::BI_CountLeadingOnes64)
12031 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
12032
12033 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
12034 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
12035
12036 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
12037 BuiltinID == AArch64::BI_CountLeadingZeros64)
12038 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12039 return Result;
12040 }
12041
12042 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
12043 BuiltinID == AArch64::BI_CountLeadingSigns64) {
12044 Value *Arg = EmitScalarExpr(E->getArg(0));
12045
12046 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
12047 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
12048 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
12049
12050 Value *Result = Builder.CreateCall(F, Arg, "cls");
12051 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
12052 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12053 return Result;
12054 }
12055
12056 if (BuiltinID == AArch64::BI_CountOneBits ||
12057 BuiltinID == AArch64::BI_CountOneBits64) {
12058 Value *ArgValue = EmitScalarExpr(E->getArg(0));
12059 llvm::Type *ArgType = ArgValue->getType();
12060 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
12061
12062 Value *Result = Builder.CreateCall(F, ArgValue);
12063 if (BuiltinID == AArch64::BI_CountOneBits64)
12064 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12065 return Result;
12066 }
12067
12068 if (BuiltinID == AArch64::BI__prefetch) {
12069 Value *Address = EmitScalarExpr(E->getArg(0));
12070 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
12071 Value *Locality = ConstantInt::get(Int32Ty, 3);
12072 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
12073 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
12074 return Builder.CreateCall(F, {Address, RW, Locality, Data});
12075 }
12076
12077 if (BuiltinID == AArch64::BI__hlt) {
12078 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
12079 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
12080
12081 // Return 0 for convenience, even though MSVC returns some other undefined
12082 // value.
12083 return ConstantInt::get(Builder.getInt32Ty(), 0);
12084 }
12085
12086 // Handle MSVC intrinsics before argument evaluation to prevent double
12087 // evaluation.
12088 if (std::optional<MSVCIntrin> MsvcIntId =
12090 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
12091
12092 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
12093 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
12094 return P.first == BuiltinID;
12095 });
12096 if (It != end(NEONEquivalentIntrinsicMap))
12097 BuiltinID = It->second;
12098
12099 // Find out if any arguments are required to be integer constant
12100 // expressions.
12101 unsigned ICEArguments = 0;
12103 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
12104 assert(Error == ASTContext::GE_None && "Should not codegen an error");
12105
12107 Address PtrOp0 = Address::invalid();
12108 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
12109 if (i == 0) {
12110 switch (BuiltinID) {
12111 case NEON::BI__builtin_neon_vld1_v:
12112 case NEON::BI__builtin_neon_vld1q_v:
12113 case NEON::BI__builtin_neon_vld1_dup_v:
12114 case NEON::BI__builtin_neon_vld1q_dup_v:
12115 case NEON::BI__builtin_neon_vld1_lane_v:
12116 case NEON::BI__builtin_neon_vld1q_lane_v:
12117 case NEON::BI__builtin_neon_vst1_v:
12118 case NEON::BI__builtin_neon_vst1q_v:
12119 case NEON::BI__builtin_neon_vst1_lane_v:
12120 case NEON::BI__builtin_neon_vst1q_lane_v:
12121 case NEON::BI__builtin_neon_vldap1_lane_s64:
12122 case NEON::BI__builtin_neon_vldap1q_lane_s64:
12123 case NEON::BI__builtin_neon_vstl1_lane_s64:
12124 case NEON::BI__builtin_neon_vstl1q_lane_s64:
12125 // Get the alignment for the argument in addition to the value;
12126 // we'll use it later.
12127 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
12128 Ops.push_back(PtrOp0.emitRawPointer(*this));
12129 continue;
12130 }
12131 }
12132 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
12133 }
12134
12135 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
12136 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
12137 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
12138
12139 if (Builtin) {
12140 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
12141 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
12142 assert(Result && "SISD intrinsic should have been handled");
12143 return Result;
12144 }
12145
12146 const Expr *Arg = E->getArg(E->getNumArgs()-1);
12148 if (std::optional<llvm::APSInt> Result =
12150 // Determine the type of this overloaded NEON intrinsic.
12151 Type = NeonTypeFlags(Result->getZExtValue());
12152
12153 bool usgn = Type.isUnsigned();
12154 bool quad = Type.isQuad();
12155
12156 // Handle non-overloaded intrinsics first.
12157 switch (BuiltinID) {
12158 default: break;
12159 case NEON::BI__builtin_neon_vabsh_f16:
12160 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12161 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
12162 case NEON::BI__builtin_neon_vaddq_p128: {
12163 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
12164 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12165 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12166 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12167 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
12168 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12169 return Builder.CreateBitCast(Ops[0], Int128Ty);
12170 }
12171 case NEON::BI__builtin_neon_vldrq_p128: {
12172 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12173 Value *Ptr = EmitScalarExpr(E->getArg(0));
12174 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
12176 }
12177 case NEON::BI__builtin_neon_vstrq_p128: {
12178 Value *Ptr = Ops[0];
12179 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
12180 }
12181 case NEON::BI__builtin_neon_vcvts_f32_u32:
12182 case NEON::BI__builtin_neon_vcvtd_f64_u64:
12183 usgn = true;
12184 [[fallthrough]];
12185 case NEON::BI__builtin_neon_vcvts_f32_s32:
12186 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
12187 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12188 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
12189 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
12190 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
12191 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12192 if (usgn)
12193 return Builder.CreateUIToFP(Ops[0], FTy);
12194 return Builder.CreateSIToFP(Ops[0], FTy);
12195 }
12196 case NEON::BI__builtin_neon_vcvth_f16_u16:
12197 case NEON::BI__builtin_neon_vcvth_f16_u32:
12198 case NEON::BI__builtin_neon_vcvth_f16_u64:
12199 usgn = true;
12200 [[fallthrough]];
12201 case NEON::BI__builtin_neon_vcvth_f16_s16:
12202 case NEON::BI__builtin_neon_vcvth_f16_s32:
12203 case NEON::BI__builtin_neon_vcvth_f16_s64: {
12204 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12205 llvm::Type *FTy = HalfTy;
12206 llvm::Type *InTy;
12207 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
12208 InTy = Int64Ty;
12209 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
12210 InTy = Int32Ty;
12211 else
12212 InTy = Int16Ty;
12213 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12214 if (usgn)
12215 return Builder.CreateUIToFP(Ops[0], FTy);
12216 return Builder.CreateSIToFP(Ops[0], FTy);
12217 }
12218 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12219 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12220 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12221 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12222 case NEON::BI__builtin_neon_vcvth_u16_f16:
12223 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12224 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12225 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12226 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12227 case NEON::BI__builtin_neon_vcvth_s16_f16: {
12228 unsigned Int;
12229 llvm::Type* InTy = Int32Ty;
12230 llvm::Type* FTy = HalfTy;
12231 llvm::Type *Tys[2] = {InTy, FTy};
12232 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12233 switch (BuiltinID) {
12234 default: llvm_unreachable("missing builtin ID in switch!");
12235 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12236 Int = Intrinsic::aarch64_neon_fcvtau; break;
12237 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12238 Int = Intrinsic::aarch64_neon_fcvtmu; break;
12239 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12240 Int = Intrinsic::aarch64_neon_fcvtnu; break;
12241 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12242 Int = Intrinsic::aarch64_neon_fcvtpu; break;
12243 case NEON::BI__builtin_neon_vcvth_u16_f16:
12244 Int = Intrinsic::aarch64_neon_fcvtzu; break;
12245 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12246 Int = Intrinsic::aarch64_neon_fcvtas; break;
12247 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12248 Int = Intrinsic::aarch64_neon_fcvtms; break;
12249 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12250 Int = Intrinsic::aarch64_neon_fcvtns; break;
12251 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12252 Int = Intrinsic::aarch64_neon_fcvtps; break;
12253 case NEON::BI__builtin_neon_vcvth_s16_f16:
12254 Int = Intrinsic::aarch64_neon_fcvtzs; break;
12255 }
12256 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
12257 return Builder.CreateTrunc(Ops[0], Int16Ty);
12258 }
12259 case NEON::BI__builtin_neon_vcaleh_f16:
12260 case NEON::BI__builtin_neon_vcalth_f16:
12261 case NEON::BI__builtin_neon_vcageh_f16:
12262 case NEON::BI__builtin_neon_vcagth_f16: {
12263 unsigned Int;
12264 llvm::Type* InTy = Int32Ty;
12265 llvm::Type* FTy = HalfTy;
12266 llvm::Type *Tys[2] = {InTy, FTy};
12267 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12268 switch (BuiltinID) {
12269 default: llvm_unreachable("missing builtin ID in switch!");
12270 case NEON::BI__builtin_neon_vcageh_f16:
12271 Int = Intrinsic::aarch64_neon_facge; break;
12272 case NEON::BI__builtin_neon_vcagth_f16:
12273 Int = Intrinsic::aarch64_neon_facgt; break;
12274 case NEON::BI__builtin_neon_vcaleh_f16:
12275 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
12276 case NEON::BI__builtin_neon_vcalth_f16:
12277 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
12278 }
12279 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
12280 return Builder.CreateTrunc(Ops[0], Int16Ty);
12281 }
12282 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12283 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
12284 unsigned Int;
12285 llvm::Type* InTy = Int32Ty;
12286 llvm::Type* FTy = HalfTy;
12287 llvm::Type *Tys[2] = {InTy, FTy};
12288 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12289 switch (BuiltinID) {
12290 default: llvm_unreachable("missing builtin ID in switch!");
12291 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12292 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
12293 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
12294 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
12295 }
12296 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12297 return Builder.CreateTrunc(Ops[0], Int16Ty);
12298 }
12299 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12300 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
12301 unsigned Int;
12302 llvm::Type* FTy = HalfTy;
12303 llvm::Type* InTy = Int32Ty;
12304 llvm::Type *Tys[2] = {FTy, InTy};
12305 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12306 switch (BuiltinID) {
12307 default: llvm_unreachable("missing builtin ID in switch!");
12308 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12309 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
12310 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
12311 break;
12312 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
12313 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
12314 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
12315 break;
12316 }
12317 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12318 }
12319 case NEON::BI__builtin_neon_vpaddd_s64: {
12320 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
12321 Value *Vec = EmitScalarExpr(E->getArg(0));
12322 // The vector is v2f64, so make sure it's bitcast to that.
12323 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
12324 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12325 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12326 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12327 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12328 // Pairwise addition of a v2f64 into a scalar f64.
12329 return Builder.CreateAdd(Op0, Op1, "vpaddd");
12330 }
12331 case NEON::BI__builtin_neon_vpaddd_f64: {
12332 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
12333 Value *Vec = EmitScalarExpr(E->getArg(0));
12334 // The vector is v2f64, so make sure it's bitcast to that.
12335 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
12336 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12337 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12338 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12339 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12340 // Pairwise addition of a v2f64 into a scalar f64.
12341 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12342 }
12343 case NEON::BI__builtin_neon_vpadds_f32: {
12344 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
12345 Value *Vec = EmitScalarExpr(E->getArg(0));
12346 // The vector is v2f32, so make sure it's bitcast to that.
12347 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
12348 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12349 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12350 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12351 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12352 // Pairwise addition of a v2f32 into a scalar f32.
12353 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12354 }
12355 case NEON::BI__builtin_neon_vceqzd_s64:
12356 case NEON::BI__builtin_neon_vceqzd_f64:
12357 case NEON::BI__builtin_neon_vceqzs_f32:
12358 case NEON::BI__builtin_neon_vceqzh_f16:
12359 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12361 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12362 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
12363 case NEON::BI__builtin_neon_vcgezd_s64:
12364 case NEON::BI__builtin_neon_vcgezd_f64:
12365 case NEON::BI__builtin_neon_vcgezs_f32:
12366 case NEON::BI__builtin_neon_vcgezh_f16:
12367 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12369 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12370 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
12371 case NEON::BI__builtin_neon_vclezd_s64:
12372 case NEON::BI__builtin_neon_vclezd_f64:
12373 case NEON::BI__builtin_neon_vclezs_f32:
12374 case NEON::BI__builtin_neon_vclezh_f16:
12375 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12377 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12378 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
12379 case NEON::BI__builtin_neon_vcgtzd_s64:
12380 case NEON::BI__builtin_neon_vcgtzd_f64:
12381 case NEON::BI__builtin_neon_vcgtzs_f32:
12382 case NEON::BI__builtin_neon_vcgtzh_f16:
12383 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12385 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12386 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
12387 case NEON::BI__builtin_neon_vcltzd_s64:
12388 case NEON::BI__builtin_neon_vcltzd_f64:
12389 case NEON::BI__builtin_neon_vcltzs_f32:
12390 case NEON::BI__builtin_neon_vcltzh_f16:
12391 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12393 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12394 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
12395
12396 case NEON::BI__builtin_neon_vceqzd_u64: {
12397 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12398 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12399 Ops[0] =
12400 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
12401 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
12402 }
12403 case NEON::BI__builtin_neon_vceqd_f64:
12404 case NEON::BI__builtin_neon_vcled_f64:
12405 case NEON::BI__builtin_neon_vcltd_f64:
12406 case NEON::BI__builtin_neon_vcged_f64:
12407 case NEON::BI__builtin_neon_vcgtd_f64: {
12408 llvm::CmpInst::Predicate P;
12409 switch (BuiltinID) {
12410 default: llvm_unreachable("missing builtin ID in switch!");
12411 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
12412 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
12413 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
12414 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
12415 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
12416 }
12417 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12418 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12419 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12420 if (P == llvm::FCmpInst::FCMP_OEQ)
12421 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12422 else
12423 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12424 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
12425 }
12426 case NEON::BI__builtin_neon_vceqs_f32:
12427 case NEON::BI__builtin_neon_vcles_f32:
12428 case NEON::BI__builtin_neon_vclts_f32:
12429 case NEON::BI__builtin_neon_vcges_f32:
12430 case NEON::BI__builtin_neon_vcgts_f32: {
12431 llvm::CmpInst::Predicate P;
12432 switch (BuiltinID) {
12433 default: llvm_unreachable("missing builtin ID in switch!");
12434 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
12435 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
12436 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
12437 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
12438 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
12439 }
12440 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12441 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
12442 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
12443 if (P == llvm::FCmpInst::FCMP_OEQ)
12444 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12445 else
12446 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12447 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
12448 }
12449 case NEON::BI__builtin_neon_vceqh_f16:
12450 case NEON::BI__builtin_neon_vcleh_f16:
12451 case NEON::BI__builtin_neon_vclth_f16:
12452 case NEON::BI__builtin_neon_vcgeh_f16:
12453 case NEON::BI__builtin_neon_vcgth_f16: {
12454 llvm::CmpInst::Predicate P;
12455 switch (BuiltinID) {
12456 default: llvm_unreachable("missing builtin ID in switch!");
12457 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
12458 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
12459 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
12460 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
12461 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
12462 }
12463 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12464 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
12465 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
12466 if (P == llvm::FCmpInst::FCMP_OEQ)
12467 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12468 else
12469 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12470 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
12471 }
12472 case NEON::BI__builtin_neon_vceqd_s64:
12473 case NEON::BI__builtin_neon_vceqd_u64:
12474 case NEON::BI__builtin_neon_vcgtd_s64:
12475 case NEON::BI__builtin_neon_vcgtd_u64:
12476 case NEON::BI__builtin_neon_vcltd_s64:
12477 case NEON::BI__builtin_neon_vcltd_u64:
12478 case NEON::BI__builtin_neon_vcged_u64:
12479 case NEON::BI__builtin_neon_vcged_s64:
12480 case NEON::BI__builtin_neon_vcled_u64:
12481 case NEON::BI__builtin_neon_vcled_s64: {
12482 llvm::CmpInst::Predicate P;
12483 switch (BuiltinID) {
12484 default: llvm_unreachable("missing builtin ID in switch!");
12485 case NEON::BI__builtin_neon_vceqd_s64:
12486 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12487 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12488 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12489 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12490 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12491 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12492 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12493 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12494 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12495 }
12496 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12497 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12498 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12499 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12500 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12501 }
12502 case NEON::BI__builtin_neon_vtstd_s64:
12503 case NEON::BI__builtin_neon_vtstd_u64: {
12504 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12505 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12506 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12507 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12508 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12509 llvm::Constant::getNullValue(Int64Ty));
12510 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12511 }
12512 case NEON::BI__builtin_neon_vset_lane_i8:
12513 case NEON::BI__builtin_neon_vset_lane_i16:
12514 case NEON::BI__builtin_neon_vset_lane_i32:
12515 case NEON::BI__builtin_neon_vset_lane_i64:
12516 case NEON::BI__builtin_neon_vset_lane_bf16:
12517 case NEON::BI__builtin_neon_vset_lane_f32:
12518 case NEON::BI__builtin_neon_vsetq_lane_i8:
12519 case NEON::BI__builtin_neon_vsetq_lane_i16:
12520 case NEON::BI__builtin_neon_vsetq_lane_i32:
12521 case NEON::BI__builtin_neon_vsetq_lane_i64:
12522 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12523 case NEON::BI__builtin_neon_vsetq_lane_f32:
12524 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12525 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12526 case NEON::BI__builtin_neon_vset_lane_f64:
12527 // The vector type needs a cast for the v1f64 variant.
12528 Ops[1] =
12529 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12530 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12531 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12532 case NEON::BI__builtin_neon_vsetq_lane_f64:
12533 // The vector type needs a cast for the v2f64 variant.
12534 Ops[1] =
12535 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12536 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12537 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12538
12539 case NEON::BI__builtin_neon_vget_lane_i8:
12540 case NEON::BI__builtin_neon_vdupb_lane_i8:
12541 Ops[0] =
12542 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12543 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12544 "vget_lane");
12545 case NEON::BI__builtin_neon_vgetq_lane_i8:
12546 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12547 Ops[0] =
12548 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12549 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12550 "vgetq_lane");
12551 case NEON::BI__builtin_neon_vget_lane_i16:
12552 case NEON::BI__builtin_neon_vduph_lane_i16:
12553 Ops[0] =
12554 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12555 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12556 "vget_lane");
12557 case NEON::BI__builtin_neon_vgetq_lane_i16:
12558 case NEON::BI__builtin_neon_vduph_laneq_i16:
12559 Ops[0] =
12560 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12561 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12562 "vgetq_lane");
12563 case NEON::BI__builtin_neon_vget_lane_i32:
12564 case NEON::BI__builtin_neon_vdups_lane_i32:
12565 Ops[0] =
12566 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12567 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12568 "vget_lane");
12569 case NEON::BI__builtin_neon_vdups_lane_f32:
12570 Ops[0] =
12571 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12572 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12573 "vdups_lane");
12574 case NEON::BI__builtin_neon_vgetq_lane_i32:
12575 case NEON::BI__builtin_neon_vdups_laneq_i32:
12576 Ops[0] =
12577 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12578 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12579 "vgetq_lane");
12580 case NEON::BI__builtin_neon_vget_lane_i64:
12581 case NEON::BI__builtin_neon_vdupd_lane_i64:
12582 Ops[0] =
12583 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12584 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12585 "vget_lane");
12586 case NEON::BI__builtin_neon_vdupd_lane_f64:
12587 Ops[0] =
12588 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12589 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12590 "vdupd_lane");
12591 case NEON::BI__builtin_neon_vgetq_lane_i64:
12592 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12593 Ops[0] =
12594 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12595 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12596 "vgetq_lane");
12597 case NEON::BI__builtin_neon_vget_lane_f32:
12598 Ops[0] =
12599 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12600 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12601 "vget_lane");
12602 case NEON::BI__builtin_neon_vget_lane_f64:
12603 Ops[0] =
12604 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12605 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12606 "vget_lane");
12607 case NEON::BI__builtin_neon_vgetq_lane_f32:
12608 case NEON::BI__builtin_neon_vdups_laneq_f32:
12609 Ops[0] =
12610 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12611 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12612 "vgetq_lane");
12613 case NEON::BI__builtin_neon_vgetq_lane_f64:
12614 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12615 Ops[0] =
12616 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12617 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12618 "vgetq_lane");
12619 case NEON::BI__builtin_neon_vaddh_f16:
12620 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12621 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12622 case NEON::BI__builtin_neon_vsubh_f16:
12623 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12624 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12625 case NEON::BI__builtin_neon_vmulh_f16:
12626 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12627 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12628 case NEON::BI__builtin_neon_vdivh_f16:
12629 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12630 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12631 case NEON::BI__builtin_neon_vfmah_f16:
12632 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12634 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12635 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12636 case NEON::BI__builtin_neon_vfmsh_f16: {
12637 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12638
12639 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12641 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12642 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12643 }
12644 case NEON::BI__builtin_neon_vaddd_s64:
12645 case NEON::BI__builtin_neon_vaddd_u64:
12646 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12647 case NEON::BI__builtin_neon_vsubd_s64:
12648 case NEON::BI__builtin_neon_vsubd_u64:
12649 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12650 case NEON::BI__builtin_neon_vqdmlalh_s16:
12651 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12652 SmallVector<Value *, 2> ProductOps;
12653 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12654 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12655 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12656 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12657 ProductOps, "vqdmlXl");
12658 Constant *CI = ConstantInt::get(SizeTy, 0);
12659 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12660
12661 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12662 ? Intrinsic::aarch64_neon_sqadd
12663 : Intrinsic::aarch64_neon_sqsub;
12664 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12665 }
12666 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12667 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12668 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12669 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12670 Ops, "vqshlu_n");
12671 }
12672 case NEON::BI__builtin_neon_vqshld_n_u64:
12673 case NEON::BI__builtin_neon_vqshld_n_s64: {
12674 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12675 ? Intrinsic::aarch64_neon_uqshl
12676 : Intrinsic::aarch64_neon_sqshl;
12677 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12678 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12679 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12680 }
12681 case NEON::BI__builtin_neon_vrshrd_n_u64:
12682 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12683 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12684 ? Intrinsic::aarch64_neon_urshl
12685 : Intrinsic::aarch64_neon_srshl;
12686 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12687 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12688 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12689 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12690 }
12691 case NEON::BI__builtin_neon_vrsrad_n_u64:
12692 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12693 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12694 ? Intrinsic::aarch64_neon_urshl
12695 : Intrinsic::aarch64_neon_srshl;
12696 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12697 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12698 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12699 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12700 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12701 }
12702 case NEON::BI__builtin_neon_vshld_n_s64:
12703 case NEON::BI__builtin_neon_vshld_n_u64: {
12704 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12705 return Builder.CreateShl(
12706 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12707 }
12708 case NEON::BI__builtin_neon_vshrd_n_s64: {
12709 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12710 return Builder.CreateAShr(
12711 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12712 Amt->getZExtValue())),
12713 "shrd_n");
12714 }
12715 case NEON::BI__builtin_neon_vshrd_n_u64: {
12716 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12717 uint64_t ShiftAmt = Amt->getZExtValue();
12718 // Right-shifting an unsigned value by its size yields 0.
12719 if (ShiftAmt == 64)
12720 return ConstantInt::get(Int64Ty, 0);
12721 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12722 "shrd_n");
12723 }
12724 case NEON::BI__builtin_neon_vsrad_n_s64: {
12725 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12726 Ops[1] = Builder.CreateAShr(
12727 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12728 Amt->getZExtValue())),
12729 "shrd_n");
12730 return Builder.CreateAdd(Ops[0], Ops[1]);
12731 }
12732 case NEON::BI__builtin_neon_vsrad_n_u64: {
12733 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12734 uint64_t ShiftAmt = Amt->getZExtValue();
12735 // Right-shifting an unsigned value by its size yields 0.
12736 // As Op + 0 = Op, return Ops[0] directly.
12737 if (ShiftAmt == 64)
12738 return Ops[0];
12739 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12740 "shrd_n");
12741 return Builder.CreateAdd(Ops[0], Ops[1]);
12742 }
12743 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12744 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12745 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12746 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12747 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12748 "lane");
12749 SmallVector<Value *, 2> ProductOps;
12750 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12751 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12752 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12753 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12754 ProductOps, "vqdmlXl");
12755 Constant *CI = ConstantInt::get(SizeTy, 0);
12756 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12757 Ops.pop_back();
12758
12759 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12760 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12761 ? Intrinsic::aarch64_neon_sqadd
12762 : Intrinsic::aarch64_neon_sqsub;
12763 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12764 }
12765 case NEON::BI__builtin_neon_vqdmlals_s32:
12766 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12767 SmallVector<Value *, 2> ProductOps;
12768 ProductOps.push_back(Ops[1]);
12769 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12770 Ops[1] =
12771 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12772 ProductOps, "vqdmlXl");
12773
12774 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12775 ? Intrinsic::aarch64_neon_sqadd
12776 : Intrinsic::aarch64_neon_sqsub;
12777 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12778 }
12779 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12780 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12781 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12782 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12783 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12784 "lane");
12785 SmallVector<Value *, 2> ProductOps;
12786 ProductOps.push_back(Ops[1]);
12787 ProductOps.push_back(Ops[2]);
12788 Ops[1] =
12789 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12790 ProductOps, "vqdmlXl");
12791 Ops.pop_back();
12792
12793 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12794 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12795 ? Intrinsic::aarch64_neon_sqadd
12796 : Intrinsic::aarch64_neon_sqsub;
12797 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12798 }
12799 case NEON::BI__builtin_neon_vget_lane_bf16:
12800 case NEON::BI__builtin_neon_vduph_lane_bf16:
12801 case NEON::BI__builtin_neon_vduph_lane_f16: {
12802 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12803 "vget_lane");
12804 }
12805 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12806 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12807 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12808 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12809 "vgetq_lane");
12810 }
12811
12812 case clang::AArch64::BI_InterlockedAdd:
12813 case clang::AArch64::BI_InterlockedAdd64: {
12814 Address DestAddr = CheckAtomicAlignment(*this, E);
12815 Value *Val = EmitScalarExpr(E->getArg(1));
12816 AtomicRMWInst *RMWI =
12817 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12818 llvm::AtomicOrdering::SequentiallyConsistent);
12819 return Builder.CreateAdd(RMWI, Val);
12820 }
12821 }
12822
12823 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12824 llvm::Type *Ty = VTy;
12825 if (!Ty)
12826 return nullptr;
12827
12828 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12829 // defer to common code if it's been added to our special map.
12832
12833 if (Builtin)
12835 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12836 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12837 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12838
12839 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12840 return V;
12841
12842 unsigned Int;
12843 switch (BuiltinID) {
12844 default: return nullptr;
12845 case NEON::BI__builtin_neon_vbsl_v:
12846 case NEON::BI__builtin_neon_vbslq_v: {
12847 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12848 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12849 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12850 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12851
12852 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12853 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12854 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12855 return Builder.CreateBitCast(Ops[0], Ty);
12856 }
12857 case NEON::BI__builtin_neon_vfma_lane_v:
12858 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12859 // The ARM builtins (and instructions) have the addend as the first
12860 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12861 Value *Addend = Ops[0];
12862 Value *Multiplicand = Ops[1];
12863 Value *LaneSource = Ops[2];
12864 Ops[0] = Multiplicand;
12865 Ops[1] = LaneSource;
12866 Ops[2] = Addend;
12867
12868 // Now adjust things to handle the lane access.
12869 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12870 ? llvm::FixedVectorType::get(VTy->getElementType(),
12871 VTy->getNumElements() / 2)
12872 : VTy;
12873 llvm::Constant *cst = cast<Constant>(Ops[3]);
12874 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12875 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12876 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12877
12878 Ops.pop_back();
12879 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12880 : Intrinsic::fma;
12881 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12882 }
12883 case NEON::BI__builtin_neon_vfma_laneq_v: {
12884 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12885 // v1f64 fma should be mapped to Neon scalar f64 fma
12886 if (VTy && VTy->getElementType() == DoubleTy) {
12887 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12888 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12889 llvm::FixedVectorType *VTy =
12891 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12892 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12893 Value *Result;
12895 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12896 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12897 return Builder.CreateBitCast(Result, Ty);
12898 }
12899 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12900 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12901
12902 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12903 VTy->getNumElements() * 2);
12904 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12905 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12906 cast<ConstantInt>(Ops[3]));
12907 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12908
12910 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12911 {Ops[2], Ops[1], Ops[0]});
12912 }
12913 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12914 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12915 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12916
12917 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12918 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12920 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12921 {Ops[2], Ops[1], Ops[0]});
12922 }
12923 case NEON::BI__builtin_neon_vfmah_lane_f16:
12924 case NEON::BI__builtin_neon_vfmas_lane_f32:
12925 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12926 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12927 case NEON::BI__builtin_neon_vfmad_lane_f64:
12928 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12929 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12930 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12931 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12933 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12934 {Ops[1], Ops[2], Ops[0]});
12935 }
12936 case NEON::BI__builtin_neon_vmull_v:
12937 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12938 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12939 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12940 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12941 case NEON::BI__builtin_neon_vmax_v:
12942 case NEON::BI__builtin_neon_vmaxq_v:
12943 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12944 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12945 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12946 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12947 case NEON::BI__builtin_neon_vmaxh_f16: {
12948 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12949 Int = Intrinsic::aarch64_neon_fmax;
12950 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12951 }
12952 case NEON::BI__builtin_neon_vmin_v:
12953 case NEON::BI__builtin_neon_vminq_v:
12954 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12955 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12956 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12957 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12958 case NEON::BI__builtin_neon_vminh_f16: {
12959 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12960 Int = Intrinsic::aarch64_neon_fmin;
12961 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12962 }
12963 case NEON::BI__builtin_neon_vabd_v:
12964 case NEON::BI__builtin_neon_vabdq_v:
12965 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12966 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12967 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12968 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12969 case NEON::BI__builtin_neon_vpadal_v:
12970 case NEON::BI__builtin_neon_vpadalq_v: {
12971 unsigned ArgElts = VTy->getNumElements();
12972 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12973 unsigned BitWidth = EltTy->getBitWidth();
12974 auto *ArgTy = llvm::FixedVectorType::get(
12975 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12976 llvm::Type* Tys[2] = { VTy, ArgTy };
12977 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12979 TmpOps.push_back(Ops[1]);
12980 Function *F = CGM.getIntrinsic(Int, Tys);
12981 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12982 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12983 return Builder.CreateAdd(tmp, addend);
12984 }
12985 case NEON::BI__builtin_neon_vpmin_v:
12986 case NEON::BI__builtin_neon_vpminq_v:
12987 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12988 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12989 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12990 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12991 case NEON::BI__builtin_neon_vpmax_v:
12992 case NEON::BI__builtin_neon_vpmaxq_v:
12993 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12994 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12995 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12996 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12997 case NEON::BI__builtin_neon_vminnm_v:
12998 case NEON::BI__builtin_neon_vminnmq_v:
12999 Int = Intrinsic::aarch64_neon_fminnm;
13000 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
13001 case NEON::BI__builtin_neon_vminnmh_f16:
13002 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13003 Int = Intrinsic::aarch64_neon_fminnm;
13004 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
13005 case NEON::BI__builtin_neon_vmaxnm_v:
13006 case NEON::BI__builtin_neon_vmaxnmq_v:
13007 Int = Intrinsic::aarch64_neon_fmaxnm;
13008 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
13009 case NEON::BI__builtin_neon_vmaxnmh_f16:
13010 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13011 Int = Intrinsic::aarch64_neon_fmaxnm;
13012 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
13013 case NEON::BI__builtin_neon_vrecpss_f32: {
13014 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13015 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
13016 Ops, "vrecps");
13017 }
13018 case NEON::BI__builtin_neon_vrecpsd_f64:
13019 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13020 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
13021 Ops, "vrecps");
13022 case NEON::BI__builtin_neon_vrecpsh_f16:
13023 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13024 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
13025 Ops, "vrecps");
13026 case NEON::BI__builtin_neon_vqshrun_n_v:
13027 Int = Intrinsic::aarch64_neon_sqshrun;
13028 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
13029 case NEON::BI__builtin_neon_vqrshrun_n_v:
13030 Int = Intrinsic::aarch64_neon_sqrshrun;
13031 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
13032 case NEON::BI__builtin_neon_vqshrn_n_v:
13033 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
13034 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
13035 case NEON::BI__builtin_neon_vrshrn_n_v:
13036 Int = Intrinsic::aarch64_neon_rshrn;
13037 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
13038 case NEON::BI__builtin_neon_vqrshrn_n_v:
13039 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
13040 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
13041 case NEON::BI__builtin_neon_vrndah_f16: {
13042 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13043 Int = Builder.getIsFPConstrained()
13044 ? Intrinsic::experimental_constrained_round
13045 : Intrinsic::round;
13046 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
13047 }
13048 case NEON::BI__builtin_neon_vrnda_v:
13049 case NEON::BI__builtin_neon_vrndaq_v: {
13050 Int = Builder.getIsFPConstrained()
13051 ? Intrinsic::experimental_constrained_round
13052 : Intrinsic::round;
13053 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
13054 }
13055 case NEON::BI__builtin_neon_vrndih_f16: {
13056 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13057 Int = Builder.getIsFPConstrained()
13058 ? Intrinsic::experimental_constrained_nearbyint
13059 : Intrinsic::nearbyint;
13060 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
13061 }
13062 case NEON::BI__builtin_neon_vrndmh_f16: {
13063 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13064 Int = Builder.getIsFPConstrained()
13065 ? Intrinsic::experimental_constrained_floor
13066 : Intrinsic::floor;
13067 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
13068 }
13069 case NEON::BI__builtin_neon_vrndm_v:
13070 case NEON::BI__builtin_neon_vrndmq_v: {
13071 Int = Builder.getIsFPConstrained()
13072 ? Intrinsic::experimental_constrained_floor
13073 : Intrinsic::floor;
13074 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
13075 }
13076 case NEON::BI__builtin_neon_vrndnh_f16: {
13077 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13078 Int = Builder.getIsFPConstrained()
13079 ? Intrinsic::experimental_constrained_roundeven
13080 : Intrinsic::roundeven;
13081 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
13082 }
13083 case NEON::BI__builtin_neon_vrndn_v:
13084 case NEON::BI__builtin_neon_vrndnq_v: {
13085 Int = Builder.getIsFPConstrained()
13086 ? Intrinsic::experimental_constrained_roundeven
13087 : Intrinsic::roundeven;
13088 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
13089 }
13090 case NEON::BI__builtin_neon_vrndns_f32: {
13091 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13092 Int = Builder.getIsFPConstrained()
13093 ? Intrinsic::experimental_constrained_roundeven
13094 : Intrinsic::roundeven;
13095 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
13096 }
13097 case NEON::BI__builtin_neon_vrndph_f16: {
13098 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13099 Int = Builder.getIsFPConstrained()
13100 ? Intrinsic::experimental_constrained_ceil
13101 : Intrinsic::ceil;
13102 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
13103 }
13104 case NEON::BI__builtin_neon_vrndp_v:
13105 case NEON::BI__builtin_neon_vrndpq_v: {
13106 Int = Builder.getIsFPConstrained()
13107 ? Intrinsic::experimental_constrained_ceil
13108 : Intrinsic::ceil;
13109 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
13110 }
13111 case NEON::BI__builtin_neon_vrndxh_f16: {
13112 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13113 Int = Builder.getIsFPConstrained()
13114 ? Intrinsic::experimental_constrained_rint
13115 : Intrinsic::rint;
13116 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
13117 }
13118 case NEON::BI__builtin_neon_vrndx_v:
13119 case NEON::BI__builtin_neon_vrndxq_v: {
13120 Int = Builder.getIsFPConstrained()
13121 ? Intrinsic::experimental_constrained_rint
13122 : Intrinsic::rint;
13123 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
13124 }
13125 case NEON::BI__builtin_neon_vrndh_f16: {
13126 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13127 Int = Builder.getIsFPConstrained()
13128 ? Intrinsic::experimental_constrained_trunc
13129 : Intrinsic::trunc;
13130 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
13131 }
13132 case NEON::BI__builtin_neon_vrnd32x_f32:
13133 case NEON::BI__builtin_neon_vrnd32xq_f32:
13134 case NEON::BI__builtin_neon_vrnd32x_f64:
13135 case NEON::BI__builtin_neon_vrnd32xq_f64: {
13136 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13137 Int = Intrinsic::aarch64_neon_frint32x;
13138 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
13139 }
13140 case NEON::BI__builtin_neon_vrnd32z_f32:
13141 case NEON::BI__builtin_neon_vrnd32zq_f32:
13142 case NEON::BI__builtin_neon_vrnd32z_f64:
13143 case NEON::BI__builtin_neon_vrnd32zq_f64: {
13144 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13145 Int = Intrinsic::aarch64_neon_frint32z;
13146 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
13147 }
13148 case NEON::BI__builtin_neon_vrnd64x_f32:
13149 case NEON::BI__builtin_neon_vrnd64xq_f32:
13150 case NEON::BI__builtin_neon_vrnd64x_f64:
13151 case NEON::BI__builtin_neon_vrnd64xq_f64: {
13152 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13153 Int = Intrinsic::aarch64_neon_frint64x;
13154 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
13155 }
13156 case NEON::BI__builtin_neon_vrnd64z_f32:
13157 case NEON::BI__builtin_neon_vrnd64zq_f32:
13158 case NEON::BI__builtin_neon_vrnd64z_f64:
13159 case NEON::BI__builtin_neon_vrnd64zq_f64: {
13160 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13161 Int = Intrinsic::aarch64_neon_frint64z;
13162 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
13163 }
13164 case NEON::BI__builtin_neon_vrnd_v:
13165 case NEON::BI__builtin_neon_vrndq_v: {
13166 Int = Builder.getIsFPConstrained()
13167 ? Intrinsic::experimental_constrained_trunc
13168 : Intrinsic::trunc;
13169 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
13170 }
13171 case NEON::BI__builtin_neon_vcvt_f64_v:
13172 case NEON::BI__builtin_neon_vcvtq_f64_v:
13173 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13174 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
13175 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
13176 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
13177 case NEON::BI__builtin_neon_vcvt_f64_f32: {
13178 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
13179 "unexpected vcvt_f64_f32 builtin");
13180 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
13181 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13182
13183 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
13184 }
13185 case NEON::BI__builtin_neon_vcvt_f32_f64: {
13186 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
13187 "unexpected vcvt_f32_f64 builtin");
13188 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
13189 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13190
13191 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
13192 }
13193 case NEON::BI__builtin_neon_vcvt_s32_v:
13194 case NEON::BI__builtin_neon_vcvt_u32_v:
13195 case NEON::BI__builtin_neon_vcvt_s64_v:
13196 case NEON::BI__builtin_neon_vcvt_u64_v:
13197 case NEON::BI__builtin_neon_vcvt_s16_f16:
13198 case NEON::BI__builtin_neon_vcvt_u16_f16:
13199 case NEON::BI__builtin_neon_vcvtq_s32_v:
13200 case NEON::BI__builtin_neon_vcvtq_u32_v:
13201 case NEON::BI__builtin_neon_vcvtq_s64_v:
13202 case NEON::BI__builtin_neon_vcvtq_u64_v:
13203 case NEON::BI__builtin_neon_vcvtq_s16_f16:
13204 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
13205 Int =
13206 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
13207 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
13208 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
13209 }
13210 case NEON::BI__builtin_neon_vcvta_s16_f16:
13211 case NEON::BI__builtin_neon_vcvta_u16_f16:
13212 case NEON::BI__builtin_neon_vcvta_s32_v:
13213 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
13214 case NEON::BI__builtin_neon_vcvtaq_s32_v:
13215 case NEON::BI__builtin_neon_vcvta_u32_v:
13216 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
13217 case NEON::BI__builtin_neon_vcvtaq_u32_v:
13218 case NEON::BI__builtin_neon_vcvta_s64_v:
13219 case NEON::BI__builtin_neon_vcvtaq_s64_v:
13220 case NEON::BI__builtin_neon_vcvta_u64_v:
13221 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
13222 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
13223 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13224 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
13225 }
13226 case NEON::BI__builtin_neon_vcvtm_s16_f16:
13227 case NEON::BI__builtin_neon_vcvtm_s32_v:
13228 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
13229 case NEON::BI__builtin_neon_vcvtmq_s32_v:
13230 case NEON::BI__builtin_neon_vcvtm_u16_f16:
13231 case NEON::BI__builtin_neon_vcvtm_u32_v:
13232 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
13233 case NEON::BI__builtin_neon_vcvtmq_u32_v:
13234 case NEON::BI__builtin_neon_vcvtm_s64_v:
13235 case NEON::BI__builtin_neon_vcvtmq_s64_v:
13236 case NEON::BI__builtin_neon_vcvtm_u64_v:
13237 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
13238 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
13239 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13240 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
13241 }
13242 case NEON::BI__builtin_neon_vcvtn_s16_f16:
13243 case NEON::BI__builtin_neon_vcvtn_s32_v:
13244 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
13245 case NEON::BI__builtin_neon_vcvtnq_s32_v:
13246 case NEON::BI__builtin_neon_vcvtn_u16_f16:
13247 case NEON::BI__builtin_neon_vcvtn_u32_v:
13248 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
13249 case NEON::BI__builtin_neon_vcvtnq_u32_v:
13250 case NEON::BI__builtin_neon_vcvtn_s64_v:
13251 case NEON::BI__builtin_neon_vcvtnq_s64_v:
13252 case NEON::BI__builtin_neon_vcvtn_u64_v:
13253 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
13254 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
13255 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13256 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
13257 }
13258 case NEON::BI__builtin_neon_vcvtp_s16_f16:
13259 case NEON::BI__builtin_neon_vcvtp_s32_v:
13260 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
13261 case NEON::BI__builtin_neon_vcvtpq_s32_v:
13262 case NEON::BI__builtin_neon_vcvtp_u16_f16:
13263 case NEON::BI__builtin_neon_vcvtp_u32_v:
13264 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
13265 case NEON::BI__builtin_neon_vcvtpq_u32_v:
13266 case NEON::BI__builtin_neon_vcvtp_s64_v:
13267 case NEON::BI__builtin_neon_vcvtpq_s64_v:
13268 case NEON::BI__builtin_neon_vcvtp_u64_v:
13269 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
13270 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
13271 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13272 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
13273 }
13274 case NEON::BI__builtin_neon_vmulx_v:
13275 case NEON::BI__builtin_neon_vmulxq_v: {
13276 Int = Intrinsic::aarch64_neon_fmulx;
13277 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
13278 }
13279 case NEON::BI__builtin_neon_vmulxh_lane_f16:
13280 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
13281 // vmulx_lane should be mapped to Neon scalar mulx after
13282 // extracting the scalar element
13283 Ops.push_back(EmitScalarExpr(E->getArg(2)));
13284 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13285 Ops.pop_back();
13286 Int = Intrinsic::aarch64_neon_fmulx;
13287 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
13288 }
13289 case NEON::BI__builtin_neon_vmul_lane_v:
13290 case NEON::BI__builtin_neon_vmul_laneq_v: {
13291 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
13292 bool Quad = false;
13293 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
13294 Quad = true;
13295 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13296 llvm::FixedVectorType *VTy =
13298 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13299 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13300 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
13301 return Builder.CreateBitCast(Result, Ty);
13302 }
13303 case NEON::BI__builtin_neon_vnegd_s64:
13304 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
13305 case NEON::BI__builtin_neon_vnegh_f16:
13306 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
13307 case NEON::BI__builtin_neon_vpmaxnm_v:
13308 case NEON::BI__builtin_neon_vpmaxnmq_v: {
13309 Int = Intrinsic::aarch64_neon_fmaxnmp;
13310 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
13311 }
13312 case NEON::BI__builtin_neon_vpminnm_v:
13313 case NEON::BI__builtin_neon_vpminnmq_v: {
13314 Int = Intrinsic::aarch64_neon_fminnmp;
13315 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
13316 }
13317 case NEON::BI__builtin_neon_vsqrth_f16: {
13318 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13319 Int = Builder.getIsFPConstrained()
13320 ? Intrinsic::experimental_constrained_sqrt
13321 : Intrinsic::sqrt;
13322 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
13323 }
13324 case NEON::BI__builtin_neon_vsqrt_v:
13325 case NEON::BI__builtin_neon_vsqrtq_v: {
13326 Int = Builder.getIsFPConstrained()
13327 ? Intrinsic::experimental_constrained_sqrt
13328 : Intrinsic::sqrt;
13329 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13330 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
13331 }
13332 case NEON::BI__builtin_neon_vrbit_v:
13333 case NEON::BI__builtin_neon_vrbitq_v: {
13334 Int = Intrinsic::bitreverse;
13335 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
13336 }
13337 case NEON::BI__builtin_neon_vaddv_u8:
13338 // FIXME: These are handled by the AArch64 scalar code.
13339 usgn = true;
13340 [[fallthrough]];
13341 case NEON::BI__builtin_neon_vaddv_s8: {
13342 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13343 Ty = Int32Ty;
13344 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13345 llvm::Type *Tys[2] = { Ty, VTy };
13346 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13347 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13348 return Builder.CreateTrunc(Ops[0], Int8Ty);
13349 }
13350 case NEON::BI__builtin_neon_vaddv_u16:
13351 usgn = true;
13352 [[fallthrough]];
13353 case NEON::BI__builtin_neon_vaddv_s16: {
13354 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13355 Ty = Int32Ty;
13356 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13357 llvm::Type *Tys[2] = { Ty, VTy };
13358 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13359 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13360 return Builder.CreateTrunc(Ops[0], Int16Ty);
13361 }
13362 case NEON::BI__builtin_neon_vaddvq_u8:
13363 usgn = true;
13364 [[fallthrough]];
13365 case NEON::BI__builtin_neon_vaddvq_s8: {
13366 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13367 Ty = Int32Ty;
13368 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13369 llvm::Type *Tys[2] = { Ty, VTy };
13370 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13371 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13372 return Builder.CreateTrunc(Ops[0], Int8Ty);
13373 }
13374 case NEON::BI__builtin_neon_vaddvq_u16:
13375 usgn = true;
13376 [[fallthrough]];
13377 case NEON::BI__builtin_neon_vaddvq_s16: {
13378 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13379 Ty = Int32Ty;
13380 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13381 llvm::Type *Tys[2] = { Ty, VTy };
13382 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13383 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13384 return Builder.CreateTrunc(Ops[0], Int16Ty);
13385 }
13386 case NEON::BI__builtin_neon_vmaxv_u8: {
13387 Int = Intrinsic::aarch64_neon_umaxv;
13388 Ty = Int32Ty;
13389 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13390 llvm::Type *Tys[2] = { Ty, VTy };
13391 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13392 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13393 return Builder.CreateTrunc(Ops[0], Int8Ty);
13394 }
13395 case NEON::BI__builtin_neon_vmaxv_u16: {
13396 Int = Intrinsic::aarch64_neon_umaxv;
13397 Ty = Int32Ty;
13398 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13399 llvm::Type *Tys[2] = { Ty, VTy };
13400 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13401 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13402 return Builder.CreateTrunc(Ops[0], Int16Ty);
13403 }
13404 case NEON::BI__builtin_neon_vmaxvq_u8: {
13405 Int = Intrinsic::aarch64_neon_umaxv;
13406 Ty = Int32Ty;
13407 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13408 llvm::Type *Tys[2] = { Ty, VTy };
13409 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13410 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13411 return Builder.CreateTrunc(Ops[0], Int8Ty);
13412 }
13413 case NEON::BI__builtin_neon_vmaxvq_u16: {
13414 Int = Intrinsic::aarch64_neon_umaxv;
13415 Ty = Int32Ty;
13416 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13417 llvm::Type *Tys[2] = { Ty, VTy };
13418 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13419 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13420 return Builder.CreateTrunc(Ops[0], Int16Ty);
13421 }
13422 case NEON::BI__builtin_neon_vmaxv_s8: {
13423 Int = Intrinsic::aarch64_neon_smaxv;
13424 Ty = Int32Ty;
13425 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13426 llvm::Type *Tys[2] = { Ty, VTy };
13427 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13428 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13429 return Builder.CreateTrunc(Ops[0], Int8Ty);
13430 }
13431 case NEON::BI__builtin_neon_vmaxv_s16: {
13432 Int = Intrinsic::aarch64_neon_smaxv;
13433 Ty = Int32Ty;
13434 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13435 llvm::Type *Tys[2] = { Ty, VTy };
13436 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13437 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13438 return Builder.CreateTrunc(Ops[0], Int16Ty);
13439 }
13440 case NEON::BI__builtin_neon_vmaxvq_s8: {
13441 Int = Intrinsic::aarch64_neon_smaxv;
13442 Ty = Int32Ty;
13443 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13444 llvm::Type *Tys[2] = { Ty, VTy };
13445 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13446 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13447 return Builder.CreateTrunc(Ops[0], Int8Ty);
13448 }
13449 case NEON::BI__builtin_neon_vmaxvq_s16: {
13450 Int = Intrinsic::aarch64_neon_smaxv;
13451 Ty = Int32Ty;
13452 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13453 llvm::Type *Tys[2] = { Ty, VTy };
13454 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13455 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13456 return Builder.CreateTrunc(Ops[0], Int16Ty);
13457 }
13458 case NEON::BI__builtin_neon_vmaxv_f16: {
13459 Int = Intrinsic::aarch64_neon_fmaxv;
13460 Ty = HalfTy;
13461 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13462 llvm::Type *Tys[2] = { Ty, VTy };
13463 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13464 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13465 return Builder.CreateTrunc(Ops[0], HalfTy);
13466 }
13467 case NEON::BI__builtin_neon_vmaxvq_f16: {
13468 Int = Intrinsic::aarch64_neon_fmaxv;
13469 Ty = HalfTy;
13470 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13471 llvm::Type *Tys[2] = { Ty, VTy };
13472 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13473 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13474 return Builder.CreateTrunc(Ops[0], HalfTy);
13475 }
13476 case NEON::BI__builtin_neon_vminv_u8: {
13477 Int = Intrinsic::aarch64_neon_uminv;
13478 Ty = Int32Ty;
13479 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13480 llvm::Type *Tys[2] = { Ty, VTy };
13481 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13482 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13483 return Builder.CreateTrunc(Ops[0], Int8Ty);
13484 }
13485 case NEON::BI__builtin_neon_vminv_u16: {
13486 Int = Intrinsic::aarch64_neon_uminv;
13487 Ty = Int32Ty;
13488 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13489 llvm::Type *Tys[2] = { Ty, VTy };
13490 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13491 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13492 return Builder.CreateTrunc(Ops[0], Int16Ty);
13493 }
13494 case NEON::BI__builtin_neon_vminvq_u8: {
13495 Int = Intrinsic::aarch64_neon_uminv;
13496 Ty = Int32Ty;
13497 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13498 llvm::Type *Tys[2] = { Ty, VTy };
13499 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13500 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13501 return Builder.CreateTrunc(Ops[0], Int8Ty);
13502 }
13503 case NEON::BI__builtin_neon_vminvq_u16: {
13504 Int = Intrinsic::aarch64_neon_uminv;
13505 Ty = Int32Ty;
13506 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13507 llvm::Type *Tys[2] = { Ty, VTy };
13508 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13509 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13510 return Builder.CreateTrunc(Ops[0], Int16Ty);
13511 }
13512 case NEON::BI__builtin_neon_vminv_s8: {
13513 Int = Intrinsic::aarch64_neon_sminv;
13514 Ty = Int32Ty;
13515 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13516 llvm::Type *Tys[2] = { Ty, VTy };
13517 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13518 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13519 return Builder.CreateTrunc(Ops[0], Int8Ty);
13520 }
13521 case NEON::BI__builtin_neon_vminv_s16: {
13522 Int = Intrinsic::aarch64_neon_sminv;
13523 Ty = Int32Ty;
13524 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13525 llvm::Type *Tys[2] = { Ty, VTy };
13526 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13527 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13528 return Builder.CreateTrunc(Ops[0], Int16Ty);
13529 }
13530 case NEON::BI__builtin_neon_vminvq_s8: {
13531 Int = Intrinsic::aarch64_neon_sminv;
13532 Ty = Int32Ty;
13533 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13534 llvm::Type *Tys[2] = { Ty, VTy };
13535 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13536 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13537 return Builder.CreateTrunc(Ops[0], Int8Ty);
13538 }
13539 case NEON::BI__builtin_neon_vminvq_s16: {
13540 Int = Intrinsic::aarch64_neon_sminv;
13541 Ty = Int32Ty;
13542 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13543 llvm::Type *Tys[2] = { Ty, VTy };
13544 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13545 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13546 return Builder.CreateTrunc(Ops[0], Int16Ty);
13547 }
13548 case NEON::BI__builtin_neon_vminv_f16: {
13549 Int = Intrinsic::aarch64_neon_fminv;
13550 Ty = HalfTy;
13551 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13552 llvm::Type *Tys[2] = { Ty, VTy };
13553 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13554 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13555 return Builder.CreateTrunc(Ops[0], HalfTy);
13556 }
13557 case NEON::BI__builtin_neon_vminvq_f16: {
13558 Int = Intrinsic::aarch64_neon_fminv;
13559 Ty = HalfTy;
13560 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13561 llvm::Type *Tys[2] = { Ty, VTy };
13562 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13563 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13564 return Builder.CreateTrunc(Ops[0], HalfTy);
13565 }
13566 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13567 Int = Intrinsic::aarch64_neon_fmaxnmv;
13568 Ty = HalfTy;
13569 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13570 llvm::Type *Tys[2] = { Ty, VTy };
13571 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13572 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13573 return Builder.CreateTrunc(Ops[0], HalfTy);
13574 }
13575 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13576 Int = Intrinsic::aarch64_neon_fmaxnmv;
13577 Ty = HalfTy;
13578 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13579 llvm::Type *Tys[2] = { Ty, VTy };
13580 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13581 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13582 return Builder.CreateTrunc(Ops[0], HalfTy);
13583 }
13584 case NEON::BI__builtin_neon_vminnmv_f16: {
13585 Int = Intrinsic::aarch64_neon_fminnmv;
13586 Ty = HalfTy;
13587 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13588 llvm::Type *Tys[2] = { Ty, VTy };
13589 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13590 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13591 return Builder.CreateTrunc(Ops[0], HalfTy);
13592 }
13593 case NEON::BI__builtin_neon_vminnmvq_f16: {
13594 Int = Intrinsic::aarch64_neon_fminnmv;
13595 Ty = HalfTy;
13596 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13597 llvm::Type *Tys[2] = { Ty, VTy };
13598 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13599 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13600 return Builder.CreateTrunc(Ops[0], HalfTy);
13601 }
13602 case NEON::BI__builtin_neon_vmul_n_f64: {
13603 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13604 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13605 return Builder.CreateFMul(Ops[0], RHS);
13606 }
13607 case NEON::BI__builtin_neon_vaddlv_u8: {
13608 Int = Intrinsic::aarch64_neon_uaddlv;
13609 Ty = Int32Ty;
13610 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13611 llvm::Type *Tys[2] = { Ty, VTy };
13612 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13613 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13614 return Builder.CreateTrunc(Ops[0], Int16Ty);
13615 }
13616 case NEON::BI__builtin_neon_vaddlv_u16: {
13617 Int = Intrinsic::aarch64_neon_uaddlv;
13618 Ty = Int32Ty;
13619 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13620 llvm::Type *Tys[2] = { Ty, VTy };
13621 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13622 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13623 }
13624 case NEON::BI__builtin_neon_vaddlvq_u8: {
13625 Int = Intrinsic::aarch64_neon_uaddlv;
13626 Ty = Int32Ty;
13627 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13628 llvm::Type *Tys[2] = { Ty, VTy };
13629 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13630 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13631 return Builder.CreateTrunc(Ops[0], Int16Ty);
13632 }
13633 case NEON::BI__builtin_neon_vaddlvq_u16: {
13634 Int = Intrinsic::aarch64_neon_uaddlv;
13635 Ty = Int32Ty;
13636 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13637 llvm::Type *Tys[2] = { Ty, VTy };
13638 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13639 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13640 }
13641 case NEON::BI__builtin_neon_vaddlv_s8: {
13642 Int = Intrinsic::aarch64_neon_saddlv;
13643 Ty = Int32Ty;
13644 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13645 llvm::Type *Tys[2] = { Ty, VTy };
13646 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13647 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13648 return Builder.CreateTrunc(Ops[0], Int16Ty);
13649 }
13650 case NEON::BI__builtin_neon_vaddlv_s16: {
13651 Int = Intrinsic::aarch64_neon_saddlv;
13652 Ty = Int32Ty;
13653 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13654 llvm::Type *Tys[2] = { Ty, VTy };
13655 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13656 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13657 }
13658 case NEON::BI__builtin_neon_vaddlvq_s8: {
13659 Int = Intrinsic::aarch64_neon_saddlv;
13660 Ty = Int32Ty;
13661 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13662 llvm::Type *Tys[2] = { Ty, VTy };
13663 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13664 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13665 return Builder.CreateTrunc(Ops[0], Int16Ty);
13666 }
13667 case NEON::BI__builtin_neon_vaddlvq_s16: {
13668 Int = Intrinsic::aarch64_neon_saddlv;
13669 Ty = Int32Ty;
13670 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13671 llvm::Type *Tys[2] = { Ty, VTy };
13672 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13673 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13674 }
13675 case NEON::BI__builtin_neon_vsri_n_v:
13676 case NEON::BI__builtin_neon_vsriq_n_v: {
13677 Int = Intrinsic::aarch64_neon_vsri;
13678 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13679 return EmitNeonCall(Intrin, Ops, "vsri_n");
13680 }
13681 case NEON::BI__builtin_neon_vsli_n_v:
13682 case NEON::BI__builtin_neon_vsliq_n_v: {
13683 Int = Intrinsic::aarch64_neon_vsli;
13684 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13685 return EmitNeonCall(Intrin, Ops, "vsli_n");
13686 }
13687 case NEON::BI__builtin_neon_vsra_n_v:
13688 case NEON::BI__builtin_neon_vsraq_n_v:
13689 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13690 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13691 return Builder.CreateAdd(Ops[0], Ops[1]);
13692 case NEON::BI__builtin_neon_vrsra_n_v:
13693 case NEON::BI__builtin_neon_vrsraq_n_v: {
13694 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13696 TmpOps.push_back(Ops[1]);
13697 TmpOps.push_back(Ops[2]);
13698 Function* F = CGM.getIntrinsic(Int, Ty);
13699 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13700 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13701 return Builder.CreateAdd(Ops[0], tmp);
13702 }
13703 case NEON::BI__builtin_neon_vld1_v:
13704 case NEON::BI__builtin_neon_vld1q_v: {
13705 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13706 }
13707 case NEON::BI__builtin_neon_vst1_v:
13708 case NEON::BI__builtin_neon_vst1q_v:
13709 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13710 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13711 case NEON::BI__builtin_neon_vld1_lane_v:
13712 case NEON::BI__builtin_neon_vld1q_lane_v: {
13713 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13714 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13715 PtrOp0.getAlignment());
13716 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13717 }
13718 case NEON::BI__builtin_neon_vldap1_lane_s64:
13719 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13720 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13721 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13722 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13723 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13724 Ops[0] = LI;
13725 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13726 }
13727 case NEON::BI__builtin_neon_vld1_dup_v:
13728 case NEON::BI__builtin_neon_vld1q_dup_v: {
13729 Value *V = PoisonValue::get(Ty);
13730 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13731 PtrOp0.getAlignment());
13732 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13733 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13734 return EmitNeonSplat(Ops[0], CI);
13735 }
13736 case NEON::BI__builtin_neon_vst1_lane_v:
13737 case NEON::BI__builtin_neon_vst1q_lane_v:
13738 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13739 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13740 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13741 case NEON::BI__builtin_neon_vstl1_lane_s64:
13742 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13743 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13744 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13745 llvm::StoreInst *SI =
13746 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13747 SI->setAtomic(llvm::AtomicOrdering::Release);
13748 return SI;
13749 }
13750 case NEON::BI__builtin_neon_vld2_v:
13751 case NEON::BI__builtin_neon_vld2q_v: {
13752 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13753 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13754 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13755 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13756 }
13757 case NEON::BI__builtin_neon_vld3_v:
13758 case NEON::BI__builtin_neon_vld3q_v: {
13759 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13760 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13761 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13762 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13763 }
13764 case NEON::BI__builtin_neon_vld4_v:
13765 case NEON::BI__builtin_neon_vld4q_v: {
13766 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13767 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13768 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13769 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13770 }
13771 case NEON::BI__builtin_neon_vld2_dup_v:
13772 case NEON::BI__builtin_neon_vld2q_dup_v: {
13773 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13774 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13775 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13776 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13777 }
13778 case NEON::BI__builtin_neon_vld3_dup_v:
13779 case NEON::BI__builtin_neon_vld3q_dup_v: {
13780 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13781 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13782 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13783 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13784 }
13785 case NEON::BI__builtin_neon_vld4_dup_v:
13786 case NEON::BI__builtin_neon_vld4q_dup_v: {
13787 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13788 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13789 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13790 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13791 }
13792 case NEON::BI__builtin_neon_vld2_lane_v:
13793 case NEON::BI__builtin_neon_vld2q_lane_v: {
13794 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13795 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13796 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13797 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13798 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13799 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13800 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13801 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13802 }
13803 case NEON::BI__builtin_neon_vld3_lane_v:
13804 case NEON::BI__builtin_neon_vld3q_lane_v: {
13805 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13806 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13807 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13808 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13809 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13810 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13811 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13812 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13813 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13814 }
13815 case NEON::BI__builtin_neon_vld4_lane_v:
13816 case NEON::BI__builtin_neon_vld4q_lane_v: {
13817 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13818 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13819 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13820 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13821 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13822 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13823 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13824 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13825 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13826 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13827 }
13828 case NEON::BI__builtin_neon_vst2_v:
13829 case NEON::BI__builtin_neon_vst2q_v: {
13830 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13831 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13832 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13833 Ops, "");
13834 }
13835 case NEON::BI__builtin_neon_vst2_lane_v:
13836 case NEON::BI__builtin_neon_vst2q_lane_v: {
13837 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13838 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13839 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13840 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13841 Ops, "");
13842 }
13843 case NEON::BI__builtin_neon_vst3_v:
13844 case NEON::BI__builtin_neon_vst3q_v: {
13845 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13846 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13847 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13848 Ops, "");
13849 }
13850 case NEON::BI__builtin_neon_vst3_lane_v:
13851 case NEON::BI__builtin_neon_vst3q_lane_v: {
13852 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13853 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13854 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13855 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13856 Ops, "");
13857 }
13858 case NEON::BI__builtin_neon_vst4_v:
13859 case NEON::BI__builtin_neon_vst4q_v: {
13860 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13861 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13862 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13863 Ops, "");
13864 }
13865 case NEON::BI__builtin_neon_vst4_lane_v:
13866 case NEON::BI__builtin_neon_vst4q_lane_v: {
13867 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13868 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13869 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13870 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13871 Ops, "");
13872 }
13873 case NEON::BI__builtin_neon_vtrn_v:
13874 case NEON::BI__builtin_neon_vtrnq_v: {
13875 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13876 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13877 Value *SV = nullptr;
13878
13879 for (unsigned vi = 0; vi != 2; ++vi) {
13880 SmallVector<int, 16> Indices;
13881 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13882 Indices.push_back(i+vi);
13883 Indices.push_back(i+e+vi);
13884 }
13885 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13886 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13887 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13888 }
13889 return SV;
13890 }
13891 case NEON::BI__builtin_neon_vuzp_v:
13892 case NEON::BI__builtin_neon_vuzpq_v: {
13893 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13894 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13895 Value *SV = nullptr;
13896
13897 for (unsigned vi = 0; vi != 2; ++vi) {
13898 SmallVector<int, 16> Indices;
13899 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13900 Indices.push_back(2*i+vi);
13901
13902 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13903 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13904 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13905 }
13906 return SV;
13907 }
13908 case NEON::BI__builtin_neon_vzip_v:
13909 case NEON::BI__builtin_neon_vzipq_v: {
13910 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13911 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13912 Value *SV = nullptr;
13913
13914 for (unsigned vi = 0; vi != 2; ++vi) {
13915 SmallVector<int, 16> Indices;
13916 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13917 Indices.push_back((i + vi*e) >> 1);
13918 Indices.push_back(((i + vi*e) >> 1)+e);
13919 }
13920 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13921 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13922 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13923 }
13924 return SV;
13925 }
13926 case NEON::BI__builtin_neon_vqtbl1q_v: {
13927 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13928 Ops, "vtbl1");
13929 }
13930 case NEON::BI__builtin_neon_vqtbl2q_v: {
13931 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13932 Ops, "vtbl2");
13933 }
13934 case NEON::BI__builtin_neon_vqtbl3q_v: {
13935 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13936 Ops, "vtbl3");
13937 }
13938 case NEON::BI__builtin_neon_vqtbl4q_v: {
13939 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13940 Ops, "vtbl4");
13941 }
13942 case NEON::BI__builtin_neon_vqtbx1q_v: {
13943 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13944 Ops, "vtbx1");
13945 }
13946 case NEON::BI__builtin_neon_vqtbx2q_v: {
13947 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13948 Ops, "vtbx2");
13949 }
13950 case NEON::BI__builtin_neon_vqtbx3q_v: {
13951 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13952 Ops, "vtbx3");
13953 }
13954 case NEON::BI__builtin_neon_vqtbx4q_v: {
13955 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13956 Ops, "vtbx4");
13957 }
13958 case NEON::BI__builtin_neon_vsqadd_v:
13959 case NEON::BI__builtin_neon_vsqaddq_v: {
13960 Int = Intrinsic::aarch64_neon_usqadd;
13961 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13962 }
13963 case NEON::BI__builtin_neon_vuqadd_v:
13964 case NEON::BI__builtin_neon_vuqaddq_v: {
13965 Int = Intrinsic::aarch64_neon_suqadd;
13966 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13967 }
13968
13969 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
13970 case NEON::BI__builtin_neon_vluti2_laneq_f16:
13971 case NEON::BI__builtin_neon_vluti2_laneq_p16:
13972 case NEON::BI__builtin_neon_vluti2_laneq_p8:
13973 case NEON::BI__builtin_neon_vluti2_laneq_s16:
13974 case NEON::BI__builtin_neon_vluti2_laneq_s8:
13975 case NEON::BI__builtin_neon_vluti2_laneq_u16:
13976 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
13977 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13978 llvm::Type *Tys[2];
13979 Tys[0] = Ty;
13980 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13981 /*isQuad*/ false));
13982 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13983 }
13984 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
13985 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
13986 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
13987 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
13988 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
13989 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
13990 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
13991 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
13992 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13993 llvm::Type *Tys[2];
13994 Tys[0] = Ty;
13995 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13996 /*isQuad*/ true));
13997 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13998 }
13999 case NEON::BI__builtin_neon_vluti2_lane_bf16:
14000 case NEON::BI__builtin_neon_vluti2_lane_f16:
14001 case NEON::BI__builtin_neon_vluti2_lane_p16:
14002 case NEON::BI__builtin_neon_vluti2_lane_p8:
14003 case NEON::BI__builtin_neon_vluti2_lane_s16:
14004 case NEON::BI__builtin_neon_vluti2_lane_s8:
14005 case NEON::BI__builtin_neon_vluti2_lane_u16:
14006 case NEON::BI__builtin_neon_vluti2_lane_u8: {
14007 Int = Intrinsic::aarch64_neon_vluti2_lane;
14008 llvm::Type *Tys[2];
14009 Tys[0] = Ty;
14010 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14011 /*isQuad*/ false));
14012 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
14013 }
14014 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
14015 case NEON::BI__builtin_neon_vluti2q_lane_f16:
14016 case NEON::BI__builtin_neon_vluti2q_lane_p16:
14017 case NEON::BI__builtin_neon_vluti2q_lane_p8:
14018 case NEON::BI__builtin_neon_vluti2q_lane_s16:
14019 case NEON::BI__builtin_neon_vluti2q_lane_s8:
14020 case NEON::BI__builtin_neon_vluti2q_lane_u16:
14021 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
14022 Int = Intrinsic::aarch64_neon_vluti2_lane;
14023 llvm::Type *Tys[2];
14024 Tys[0] = Ty;
14025 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14026 /*isQuad*/ true));
14027 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
14028 }
14029 case NEON::BI__builtin_neon_vluti4q_lane_p8:
14030 case NEON::BI__builtin_neon_vluti4q_lane_s8:
14031 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
14032 Int = Intrinsic::aarch64_neon_vluti4q_lane;
14033 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
14034 }
14035 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
14036 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
14037 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
14038 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
14039 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
14040 }
14041 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
14042 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
14043 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
14044 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
14045 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
14046 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
14047 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
14048 }
14049 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
14050 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
14051 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
14052 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
14053 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
14054 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
14055 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
14056 }
14057
14058 case NEON::BI__builtin_neon_vamin_f16:
14059 case NEON::BI__builtin_neon_vaminq_f16:
14060 case NEON::BI__builtin_neon_vamin_f32:
14061 case NEON::BI__builtin_neon_vaminq_f32:
14062 case NEON::BI__builtin_neon_vaminq_f64: {
14063 Int = Intrinsic::aarch64_neon_famin;
14064 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
14065 }
14066 case NEON::BI__builtin_neon_vamax_f16:
14067 case NEON::BI__builtin_neon_vamaxq_f16:
14068 case NEON::BI__builtin_neon_vamax_f32:
14069 case NEON::BI__builtin_neon_vamaxq_f32:
14070 case NEON::BI__builtin_neon_vamaxq_f64: {
14071 Int = Intrinsic::aarch64_neon_famax;
14072 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
14073 }
14074 case NEON::BI__builtin_neon_vscale_f16:
14075 case NEON::BI__builtin_neon_vscaleq_f16:
14076 case NEON::BI__builtin_neon_vscale_f32:
14077 case NEON::BI__builtin_neon_vscaleq_f32:
14078 case NEON::BI__builtin_neon_vscaleq_f64: {
14079 Int = Intrinsic::aarch64_neon_fp8_fscale;
14080 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
14081 }
14082 }
14083}
14084
14085Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
14086 const CallExpr *E) {
14087 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
14088 BuiltinID == BPF::BI__builtin_btf_type_id ||
14089 BuiltinID == BPF::BI__builtin_preserve_type_info ||
14090 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
14091 "unexpected BPF builtin");
14092
14093 // A sequence number, injected into IR builtin functions, to
14094 // prevent CSE given the only difference of the function
14095 // may just be the debuginfo metadata.
14096 static uint32_t BuiltinSeqNum;
14097
14098 switch (BuiltinID) {
14099 default:
14100 llvm_unreachable("Unexpected BPF builtin");
14101 case BPF::BI__builtin_preserve_field_info: {
14102 const Expr *Arg = E->getArg(0);
14103 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
14104
14105 if (!getDebugInfo()) {
14106 CGM.Error(E->getExprLoc(),
14107 "using __builtin_preserve_field_info() without -g");
14108 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14109 : EmitLValue(Arg).emitRawPointer(*this);
14110 }
14111
14112 // Enable underlying preserve_*_access_index() generation.
14113 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
14114 IsInPreservedAIRegion = true;
14115 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14116 : EmitLValue(Arg).emitRawPointer(*this);
14117 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
14118
14119 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14120 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
14121
14122 // Built the IR for the preserve_field_info intrinsic.
14123 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getOrInsertDeclaration(
14124 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
14125 {FieldAddr->getType()});
14126 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
14127 }
14128 case BPF::BI__builtin_btf_type_id:
14129 case BPF::BI__builtin_preserve_type_info: {
14130 if (!getDebugInfo()) {
14131 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14132 return nullptr;
14133 }
14134
14135 const Expr *Arg0 = E->getArg(0);
14136 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14137 Arg0->getType(), Arg0->getExprLoc());
14138
14139 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14140 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14141 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14142
14143 llvm::Function *FnDecl;
14144 if (BuiltinID == BPF::BI__builtin_btf_type_id)
14145 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14146 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
14147 else
14148 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14149 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
14150 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
14151 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14152 return Fn;
14153 }
14154 case BPF::BI__builtin_preserve_enum_value: {
14155 if (!getDebugInfo()) {
14156 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14157 return nullptr;
14158 }
14159
14160 const Expr *Arg0 = E->getArg(0);
14161 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14162 Arg0->getType(), Arg0->getExprLoc());
14163
14164 // Find enumerator
14165 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
14166 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
14167 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
14168 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
14169
14170 auto InitVal = Enumerator->getInitVal();
14171 std::string InitValStr;
14172 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
14173 InitValStr = std::to_string(InitVal.getSExtValue());
14174 else
14175 InitValStr = std::to_string(InitVal.getZExtValue());
14176 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
14177 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
14178
14179 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14180 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14181 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14182
14183 llvm::Function *IntrinsicFn = llvm::Intrinsic::getOrInsertDeclaration(
14184 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
14185 CallInst *Fn =
14186 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
14187 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14188 return Fn;
14189 }
14190 }
14191}
14192
14193llvm::Value *CodeGenFunction::
14195 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
14196 "Not a power-of-two sized vector!");
14197 bool AllConstants = true;
14198 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
14199 AllConstants &= isa<Constant>(Ops[i]);
14200
14201 // If this is a constant vector, create a ConstantVector.
14202 if (AllConstants) {
14204 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14205 CstOps.push_back(cast<Constant>(Ops[i]));
14206 return llvm::ConstantVector::get(CstOps);
14207 }
14208
14209 // Otherwise, insertelement the values to build the vector.
14210 Value *Result = llvm::PoisonValue::get(
14211 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
14212
14213 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14214 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
14215
14216 return Result;
14217}
14218
14219// Convert the mask from an integer type to a vector of i1.
14221 unsigned NumElts) {
14222
14223 auto *MaskTy = llvm::FixedVectorType::get(
14224 CGF.Builder.getInt1Ty(),
14225 cast<IntegerType>(Mask->getType())->getBitWidth());
14226 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
14227
14228 // If we have less than 8 elements, then the starting mask was an i8 and
14229 // we need to extract down to the right number of elements.
14230 if (NumElts < 8) {
14231 int Indices[4];
14232 for (unsigned i = 0; i != NumElts; ++i)
14233 Indices[i] = i;
14234 MaskVec = CGF.Builder.CreateShuffleVector(
14235 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
14236 }
14237 return MaskVec;
14238}
14239
14241 Align Alignment) {
14242 Value *Ptr = Ops[0];
14243
14244 Value *MaskVec = getMaskVecValue(
14245 CGF, Ops[2],
14246 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
14247
14248 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
14249}
14250
14252 Align Alignment) {
14253 llvm::Type *Ty = Ops[1]->getType();
14254 Value *Ptr = Ops[0];
14255
14256 Value *MaskVec = getMaskVecValue(
14257 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
14258
14259 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
14260}
14261
14263 ArrayRef<Value *> Ops) {
14264 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
14265 Value *Ptr = Ops[0];
14266
14267 Value *MaskVec = getMaskVecValue(
14268 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
14269
14270 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
14271 ResultTy);
14272 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
14273}
14274
14277 bool IsCompress) {
14278 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14279
14280 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14281
14282 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
14283 : Intrinsic::x86_avx512_mask_expand;
14284 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
14285 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
14286}
14287
14289 ArrayRef<Value *> Ops) {
14290 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14291 Value *Ptr = Ops[0];
14292
14293 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14294
14295 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
14296 ResultTy);
14297 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
14298}
14299
14300static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
14302 bool InvertLHS = false) {
14303 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14304 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
14305 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
14306
14307 if (InvertLHS)
14308 LHS = CGF.Builder.CreateNot(LHS);
14309
14310 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
14311 Ops[0]->getType());
14312}
14313
14315 Value *Amt, bool IsRight) {
14316 llvm::Type *Ty = Op0->getType();
14317
14318 // Amount may be scalar immediate, in which case create a splat vector.
14319 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
14320 // we only care about the lowest log2 bits anyway.
14321 if (Amt->getType() != Ty) {
14322 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
14323 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
14324 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
14325 }
14326
14327 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
14328 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
14329 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
14330}
14331
14333 bool IsSigned) {
14334 Value *Op0 = Ops[0];
14335 Value *Op1 = Ops[1];
14336 llvm::Type *Ty = Op0->getType();
14337 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14338
14339 CmpInst::Predicate Pred;
14340 switch (Imm) {
14341 case 0x0:
14342 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
14343 break;
14344 case 0x1:
14345 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
14346 break;
14347 case 0x2:
14348 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
14349 break;
14350 case 0x3:
14351 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
14352 break;
14353 case 0x4:
14354 Pred = ICmpInst::ICMP_EQ;
14355 break;
14356 case 0x5:
14357 Pred = ICmpInst::ICMP_NE;
14358 break;
14359 case 0x6:
14360 return llvm::Constant::getNullValue(Ty); // FALSE
14361 case 0x7:
14362 return llvm::Constant::getAllOnesValue(Ty); // TRUE
14363 default:
14364 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
14365 }
14366
14367 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
14368 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
14369 return Res;
14370}
14371
14373 Value *Mask, Value *Op0, Value *Op1) {
14374
14375 // If the mask is all ones just return first argument.
14376 if (const auto *C = dyn_cast<Constant>(Mask))
14377 if (C->isAllOnesValue())
14378 return Op0;
14379
14380 Mask = getMaskVecValue(
14381 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
14382
14383 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14384}
14385
14387 Value *Mask, Value *Op0, Value *Op1) {
14388 // If the mask is all ones just return first argument.
14389 if (const auto *C = dyn_cast<Constant>(Mask))
14390 if (C->isAllOnesValue())
14391 return Op0;
14392
14393 auto *MaskTy = llvm::FixedVectorType::get(
14394 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
14395 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
14396 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
14397 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14398}
14399
14401 unsigned NumElts, Value *MaskIn) {
14402 if (MaskIn) {
14403 const auto *C = dyn_cast<Constant>(MaskIn);
14404 if (!C || !C->isAllOnesValue())
14405 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
14406 }
14407
14408 if (NumElts < 8) {
14409 int Indices[8];
14410 for (unsigned i = 0; i != NumElts; ++i)
14411 Indices[i] = i;
14412 for (unsigned i = NumElts; i != 8; ++i)
14413 Indices[i] = i % NumElts + NumElts;
14414 Cmp = CGF.Builder.CreateShuffleVector(
14415 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
14416 }
14417
14418 return CGF.Builder.CreateBitCast(Cmp,
14419 IntegerType::get(CGF.getLLVMContext(),
14420 std::max(NumElts, 8U)));
14421}
14422
14424 bool Signed, ArrayRef<Value *> Ops) {
14425 assert((Ops.size() == 2 || Ops.size() == 4) &&
14426 "Unexpected number of arguments");
14427 unsigned NumElts =
14428 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14429 Value *Cmp;
14430
14431 if (CC == 3) {
14432 Cmp = Constant::getNullValue(
14433 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14434 } else if (CC == 7) {
14435 Cmp = Constant::getAllOnesValue(
14436 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14437 } else {
14438 ICmpInst::Predicate Pred;
14439 switch (CC) {
14440 default: llvm_unreachable("Unknown condition code");
14441 case 0: Pred = ICmpInst::ICMP_EQ; break;
14442 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
14443 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
14444 case 4: Pred = ICmpInst::ICMP_NE; break;
14445 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
14446 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
14447 }
14448 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
14449 }
14450
14451 Value *MaskIn = nullptr;
14452 if (Ops.size() == 4)
14453 MaskIn = Ops[3];
14454
14455 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
14456}
14457
14459 Value *Zero = Constant::getNullValue(In->getType());
14460 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
14461}
14462
14464 ArrayRef<Value *> Ops, bool IsSigned) {
14465 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
14466 llvm::Type *Ty = Ops[1]->getType();
14467
14468 Value *Res;
14469 if (Rnd != 4) {
14470 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
14471 : Intrinsic::x86_avx512_uitofp_round;
14472 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
14473 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
14474 } else {
14475 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14476 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
14477 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
14478 }
14479
14480 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14481}
14482
14483// Lowers X86 FMA intrinsics to IR.
14485 ArrayRef<Value *> Ops, unsigned BuiltinID,
14486 bool IsAddSub) {
14487
14488 bool Subtract = false;
14489 Intrinsic::ID IID = Intrinsic::not_intrinsic;
14490 switch (BuiltinID) {
14491 default: break;
14492 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14493 Subtract = true;
14494 [[fallthrough]];
14495 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14496 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14497 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14498 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
14499 break;
14500 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14501 Subtract = true;
14502 [[fallthrough]];
14503 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14504 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14505 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14506 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
14507 break;
14508 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14509 Subtract = true;
14510 [[fallthrough]];
14511 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14512 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14513 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14514 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
14515 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14516 Subtract = true;
14517 [[fallthrough]];
14518 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14519 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14520 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14521 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
14522 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14523 Subtract = true;
14524 [[fallthrough]];
14525 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14526 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14527 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14528 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
14529 break;
14530 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14531 Subtract = true;
14532 [[fallthrough]];
14533 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14534 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14535 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14536 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
14537 break;
14538 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14539 Subtract = true;
14540 LLVM_FALLTHROUGH;
14541 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14542 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14543 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14544 IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
14545 break;
14546 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14547 Subtract = true;
14548 LLVM_FALLTHROUGH;
14549 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14550 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14551 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14552 IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
14553 break;
14554 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14555 Subtract = true;
14556 LLVM_FALLTHROUGH;
14557 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14558 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14559 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14560 IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
14561 break;
14562 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14563 Subtract = true;
14564 LLVM_FALLTHROUGH;
14565 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14566 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14567 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14568 IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
14569 break;
14570 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14571 Subtract = true;
14572 LLVM_FALLTHROUGH;
14573 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14574 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14575 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14576 IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
14577 break;
14578 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14579 Subtract = true;
14580 LLVM_FALLTHROUGH;
14581 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14582 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14583 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14584 IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
14585 break;
14586 }
14587
14588 Value *A = Ops[0];
14589 Value *B = Ops[1];
14590 Value *C = Ops[2];
14591
14592 if (Subtract)
14593 C = CGF.Builder.CreateFNeg(C);
14594
14595 Value *Res;
14596
14597 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
14598 if (IID != Intrinsic::not_intrinsic &&
14599 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
14600 IsAddSub)) {
14601 Function *Intr = CGF.CGM.getIntrinsic(IID);
14602 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
14603 } else {
14604 llvm::Type *Ty = A->getType();
14605 Function *FMA;
14606 if (CGF.Builder.getIsFPConstrained()) {
14607 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14608 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
14609 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
14610 } else {
14611 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
14612 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
14613 }
14614 }
14615
14616 // Handle any required masking.
14617 Value *MaskFalseVal = nullptr;
14618 switch (BuiltinID) {
14619 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14620 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14621 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14622 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14623 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14624 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14625 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14626 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14627 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14628 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14629 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14630 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14631 MaskFalseVal = Ops[0];
14632 break;
14633 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14634 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14635 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14636 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14637 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14638 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14639 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14640 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14641 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14642 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14643 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14644 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14645 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14646 break;
14647 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14648 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14649 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14650 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14651 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14652 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14653 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14654 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14655 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14656 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14657 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14658 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14659 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14660 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14661 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14662 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14663 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14664 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14665 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14666 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14667 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14668 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14669 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14670 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14671 MaskFalseVal = Ops[2];
14672 break;
14673 }
14674
14675 if (MaskFalseVal)
14676 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14677
14678 return Res;
14679}
14680
14682 MutableArrayRef<Value *> Ops, Value *Upper,
14683 bool ZeroMask = false, unsigned PTIdx = 0,
14684 bool NegAcc = false) {
14685 unsigned Rnd = 4;
14686 if (Ops.size() > 4)
14687 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14688
14689 if (NegAcc)
14690 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14691
14692 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14693 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14694 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14695 Value *Res;
14696 if (Rnd != 4) {
14697 Intrinsic::ID IID;
14698
14699 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14700 case 16:
14701 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14702 break;
14703 case 32:
14704 IID = Intrinsic::x86_avx512_vfmadd_f32;
14705 break;
14706 case 64:
14707 IID = Intrinsic::x86_avx512_vfmadd_f64;
14708 break;
14709 default:
14710 llvm_unreachable("Unexpected size");
14711 }
14712 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14713 {Ops[0], Ops[1], Ops[2], Ops[4]});
14714 } else if (CGF.Builder.getIsFPConstrained()) {
14715 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14716 Function *FMA = CGF.CGM.getIntrinsic(
14717 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14718 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14719 } else {
14720 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14721 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14722 }
14723 // If we have more than 3 arguments, we need to do masking.
14724 if (Ops.size() > 3) {
14725 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14726 : Ops[PTIdx];
14727
14728 // If we negated the accumulator and the its the PassThru value we need to
14729 // bypass the negate. Conveniently Upper should be the same thing in this
14730 // case.
14731 if (NegAcc && PTIdx == 2)
14732 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14733
14734 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14735 }
14736 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14737}
14738
14739static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14740 ArrayRef<Value *> Ops) {
14741 llvm::Type *Ty = Ops[0]->getType();
14742 // Arguments have a vXi32 type so cast to vXi64.
14743 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14744 Ty->getPrimitiveSizeInBits() / 64);
14745 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14746 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14747
14748 if (IsSigned) {
14749 // Shift left then arithmetic shift right.
14750 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14751 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14752 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14753 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14754 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14755 } else {
14756 // Clear the upper bits.
14757 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14758 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14759 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14760 }
14761
14762 return CGF.Builder.CreateMul(LHS, RHS);
14763}
14764
14765// Emit a masked pternlog intrinsic. This only exists because the header has to
14766// use a macro and we aren't able to pass the input argument to a pternlog
14767// builtin and a select builtin without evaluating it twice.
14768static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14769 ArrayRef<Value *> Ops) {
14770 llvm::Type *Ty = Ops[0]->getType();
14771
14772 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14773 unsigned EltWidth = Ty->getScalarSizeInBits();
14774 Intrinsic::ID IID;
14775 if (VecWidth == 128 && EltWidth == 32)
14776 IID = Intrinsic::x86_avx512_pternlog_d_128;
14777 else if (VecWidth == 256 && EltWidth == 32)
14778 IID = Intrinsic::x86_avx512_pternlog_d_256;
14779 else if (VecWidth == 512 && EltWidth == 32)
14780 IID = Intrinsic::x86_avx512_pternlog_d_512;
14781 else if (VecWidth == 128 && EltWidth == 64)
14782 IID = Intrinsic::x86_avx512_pternlog_q_128;
14783 else if (VecWidth == 256 && EltWidth == 64)
14784 IID = Intrinsic::x86_avx512_pternlog_q_256;
14785 else if (VecWidth == 512 && EltWidth == 64)
14786 IID = Intrinsic::x86_avx512_pternlog_q_512;
14787 else
14788 llvm_unreachable("Unexpected intrinsic");
14789
14790 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14791 Ops.drop_back());
14792 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14793 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14794}
14795
14797 llvm::Type *DstTy) {
14798 unsigned NumberOfElements =
14799 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14800 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14801 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14802}
14803
14804Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14805 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14806 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14807 return EmitX86CpuIs(CPUStr);
14808}
14809
14810// Convert F16 halfs to floats.
14813 llvm::Type *DstTy) {
14814 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14815 "Unknown cvtph2ps intrinsic");
14816
14817 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14818 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14819 Function *F =
14820 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14821 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14822 }
14823
14824 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14825 Value *Src = Ops[0];
14826
14827 // Extract the subvector.
14828 if (NumDstElts !=
14829 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14830 assert(NumDstElts == 4 && "Unexpected vector size");
14831 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14832 }
14833
14834 // Bitcast from vXi16 to vXf16.
14835 auto *HalfTy = llvm::FixedVectorType::get(
14836 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14837 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14838
14839 // Perform the fp-extension.
14840 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14841
14842 if (Ops.size() >= 3)
14843 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14844 return Res;
14845}
14846
14847Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14848
14849 llvm::Type *Int32Ty = Builder.getInt32Ty();
14850
14851 // Matching the struct layout from the compiler-rt/libgcc structure that is
14852 // filled in:
14853 // unsigned int __cpu_vendor;
14854 // unsigned int __cpu_type;
14855 // unsigned int __cpu_subtype;
14856 // unsigned int __cpu_features[1];
14857 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14858 llvm::ArrayType::get(Int32Ty, 1));
14859
14860 // Grab the global __cpu_model.
14861 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14862 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14863
14864 // Calculate the index needed to access the correct field based on the
14865 // range. Also adjust the expected value.
14866 unsigned Index;
14867 unsigned Value;
14868 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14869#define X86_VENDOR(ENUM, STRING) \
14870 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14871#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14872 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14873#define X86_CPU_TYPE(ENUM, STR) \
14874 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14875#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14876 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14877#define X86_CPU_SUBTYPE(ENUM, STR) \
14878 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14879#include "llvm/TargetParser/X86TargetParser.def"
14880 .Default({0, 0});
14881 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14882
14883 // Grab the appropriate field from __cpu_model.
14884 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14885 ConstantInt::get(Int32Ty, Index)};
14886 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14887 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14889
14890 // Check the value of the field against the requested value.
14891 return Builder.CreateICmpEQ(CpuValue,
14892 llvm::ConstantInt::get(Int32Ty, Value));
14893}
14894
14895Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14896 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14897 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14898 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14899 return Builder.getFalse();
14900 return EmitX86CpuSupports(FeatureStr);
14901}
14902
14903Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14904 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14905}
14906
14907llvm::Value *
14908CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14909 Value *Result = Builder.getTrue();
14910 if (FeatureMask[0] != 0) {
14911 // Matching the struct layout from the compiler-rt/libgcc structure that is
14912 // filled in:
14913 // unsigned int __cpu_vendor;
14914 // unsigned int __cpu_type;
14915 // unsigned int __cpu_subtype;
14916 // unsigned int __cpu_features[1];
14917 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14918 llvm::ArrayType::get(Int32Ty, 1));
14919
14920 // Grab the global __cpu_model.
14921 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14922 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14923
14924 // Grab the first (0th) element from the field __cpu_features off of the
14925 // global in the struct STy.
14926 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14927 Builder.getInt32(0)};
14928 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14929 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14931
14932 // Check the value of the bit corresponding to the feature requested.
14933 Value *Mask = Builder.getInt32(FeatureMask[0]);
14934 Value *Bitset = Builder.CreateAnd(Features, Mask);
14935 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14936 Result = Builder.CreateAnd(Result, Cmp);
14937 }
14938
14939 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14940 llvm::Constant *CpuFeatures2 =
14941 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14942 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14943 for (int i = 1; i != 4; ++i) {
14944 const uint32_t M = FeatureMask[i];
14945 if (!M)
14946 continue;
14947 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14948 Value *Features = Builder.CreateAlignedLoad(
14949 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
14951 // Check the value of the bit corresponding to the feature requested.
14952 Value *Mask = Builder.getInt32(M);
14953 Value *Bitset = Builder.CreateAnd(Features, Mask);
14954 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14955 Result = Builder.CreateAnd(Result, Cmp);
14956 }
14957
14958 return Result;
14959}
14960
14961Value *CodeGenFunction::EmitAArch64CpuInit() {
14962 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14963 llvm::FunctionCallee Func =
14964 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14965 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14966 cast<llvm::GlobalValue>(Func.getCallee())
14967 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14968 return Builder.CreateCall(Func);
14969}
14970
14972 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {VoidPtrTy}, false);
14973 llvm::FunctionCallee Func =
14974 CGM.CreateRuntimeFunction(FTy, "__init_riscv_feature_bits");
14975 auto *CalleeGV = cast<llvm::GlobalValue>(Func.getCallee());
14976 CalleeGV->setDSOLocal(true);
14977 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14978 return Builder.CreateCall(Func, {llvm::ConstantPointerNull::get(VoidPtrTy)});
14979}
14980
14981Value *CodeGenFunction::EmitX86CpuInit() {
14982 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14983 /*Variadic*/ false);
14984 llvm::FunctionCallee Func =
14985 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14986 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14987 cast<llvm::GlobalValue>(Func.getCallee())
14988 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14989 return Builder.CreateCall(Func);
14990}
14991
14992Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14993 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14994 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14996 ArgStr.split(Features, "+");
14997 for (auto &Feature : Features) {
14998 Feature = Feature.trim();
14999 if (!llvm::AArch64::parseFMVExtension(Feature))
15000 return Builder.getFalse();
15001 if (Feature != "default")
15002 Features.push_back(Feature);
15003 }
15004 return EmitAArch64CpuSupports(Features);
15005}
15006
15007llvm::Value *
15008CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
15009 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
15010 Value *Result = Builder.getTrue();
15011 if (FeaturesMask != 0) {
15012 // Get features from structure in runtime library
15013 // struct {
15014 // unsigned long long features;
15015 // } __aarch64_cpu_features;
15016 llvm::Type *STy = llvm::StructType::get(Int64Ty);
15017 llvm::Constant *AArch64CPUFeatures =
15018 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
15019 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
15020 llvm::Value *CpuFeatures = Builder.CreateGEP(
15021 STy, AArch64CPUFeatures,
15022 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
15023 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
15025 Value *Mask = Builder.getInt64(FeaturesMask);
15026 Value *Bitset = Builder.CreateAnd(Features, Mask);
15027 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
15028 Result = Builder.CreateAnd(Result, Cmp);
15029 }
15030 return Result;
15031}
15032
15034
15035 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
15036 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
15037 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
15038 return Builder.getFalse();
15039
15040 return EmitRISCVCpuSupports(ArrayRef<StringRef>(FeatureStr));
15041}
15042
15043static Value *loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder,
15044 CodeGenModule &CGM) {
15045 llvm::Type *Int32Ty = Builder.getInt32Ty();
15046 llvm::Type *Int64Ty = Builder.getInt64Ty();
15047 llvm::ArrayType *ArrayOfInt64Ty =
15048 llvm::ArrayType::get(Int64Ty, llvm::RISCVISAInfo::FeatureBitSize);
15049 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
15050 llvm::Constant *RISCVFeaturesBits =
15051 CGM.CreateRuntimeVariable(StructTy, "__riscv_feature_bits");
15052 cast<llvm::GlobalValue>(RISCVFeaturesBits)->setDSOLocal(true);
15053 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
15054 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
15055 IndexVal};
15056 Value *Ptr =
15057 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
15058 Value *FeaturesBit =
15059 Builder.CreateAlignedLoad(Int64Ty, Ptr, CharUnits::fromQuantity(8));
15060 return FeaturesBit;
15061}
15062
15064 const unsigned RISCVFeatureLength = llvm::RISCVISAInfo::FeatureBitSize;
15065 uint64_t RequireBitMasks[RISCVFeatureLength] = {0};
15066
15067 for (auto Feat : FeaturesStrs) {
15068 auto [GroupID, BitPos] = RISCVISAInfo::getRISCVFeaturesBitsInfo(Feat);
15069
15070 // If there isn't BitPos for this feature, skip this version.
15071 // It also report the warning to user during compilation.
15072 if (BitPos == -1)
15073 return Builder.getFalse();
15074
15075 RequireBitMasks[GroupID] |= (1ULL << BitPos);
15076 }
15077
15078 Value *Result = nullptr;
15079 for (unsigned Idx = 0; Idx < RISCVFeatureLength; Idx++) {
15080 if (RequireBitMasks[Idx] == 0)
15081 continue;
15082
15083 Value *Mask = Builder.getInt64(RequireBitMasks[Idx]);
15084 Value *Bitset =
15085 Builder.CreateAnd(loadRISCVFeatureBits(Idx, Builder, CGM), Mask);
15086 Value *CmpV = Builder.CreateICmpEQ(Bitset, Mask);
15087 Result = (!Result) ? CmpV : Builder.CreateAnd(Result, CmpV);
15088 }
15089
15090 assert(Result && "Should have value here.");
15091
15092 return Result;
15093}
15094
15095Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
15096 const CallExpr *E) {
15097 if (BuiltinID == Builtin::BI__builtin_cpu_is)
15098 return EmitX86CpuIs(E);
15099 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
15100 return EmitX86CpuSupports(E);
15101 if (BuiltinID == Builtin::BI__builtin_cpu_init)
15102 return EmitX86CpuInit();
15103
15104 // Handle MSVC intrinsics before argument evaluation to prevent double
15105 // evaluation.
15106 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
15107 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
15108
15110 bool IsMaskFCmp = false;
15111 bool IsConjFMA = false;
15112
15113 // Find out if any arguments are required to be integer constant expressions.
15114 unsigned ICEArguments = 0;
15116 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
15117 assert(Error == ASTContext::GE_None && "Should not codegen an error");
15118
15119 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
15120 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
15121 }
15122
15123 // These exist so that the builtin that takes an immediate can be bounds
15124 // checked by clang to avoid passing bad immediates to the backend. Since
15125 // AVX has a larger immediate than SSE we would need separate builtins to
15126 // do the different bounds checking. Rather than create a clang specific
15127 // SSE only builtin, this implements eight separate builtins to match gcc
15128 // implementation.
15129 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
15130 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
15131 llvm::Function *F = CGM.getIntrinsic(ID);
15132 return Builder.CreateCall(F, Ops);
15133 };
15134
15135 // For the vector forms of FP comparisons, translate the builtins directly to
15136 // IR.
15137 // TODO: The builtins could be removed if the SSE header files used vector
15138 // extension comparisons directly (vector ordered/unordered may need
15139 // additional support via __builtin_isnan()).
15140 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
15141 bool IsSignaling) {
15142 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15143 Value *Cmp;
15144 if (IsSignaling)
15145 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
15146 else
15147 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
15148 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
15149 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
15150 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
15151 return Builder.CreateBitCast(Sext, FPVecTy);
15152 };
15153
15154 switch (BuiltinID) {
15155 default: return nullptr;
15156 case X86::BI_mm_prefetch: {
15157 Value *Address = Ops[0];
15158 ConstantInt *C = cast<ConstantInt>(Ops[1]);
15159 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
15160 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
15161 Value *Data = ConstantInt::get(Int32Ty, 1);
15162 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
15163 return Builder.CreateCall(F, {Address, RW, Locality, Data});
15164 }
15165 case X86::BI_mm_clflush: {
15166 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
15167 Ops[0]);
15168 }
15169 case X86::BI_mm_lfence: {
15170 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
15171 }
15172 case X86::BI_mm_mfence: {
15173 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
15174 }
15175 case X86::BI_mm_sfence: {
15176 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
15177 }
15178 case X86::BI_mm_pause: {
15179 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
15180 }
15181 case X86::BI__rdtsc: {
15182 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
15183 }
15184 case X86::BI__builtin_ia32_rdtscp: {
15185 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
15186 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15187 Ops[0]);
15188 return Builder.CreateExtractValue(Call, 0);
15189 }
15190 case X86::BI__builtin_ia32_lzcnt_u16:
15191 case X86::BI__builtin_ia32_lzcnt_u32:
15192 case X86::BI__builtin_ia32_lzcnt_u64: {
15193 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15194 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15195 }
15196 case X86::BI__builtin_ia32_tzcnt_u16:
15197 case X86::BI__builtin_ia32_tzcnt_u32:
15198 case X86::BI__builtin_ia32_tzcnt_u64: {
15199 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
15200 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15201 }
15202 case X86::BI__builtin_ia32_undef128:
15203 case X86::BI__builtin_ia32_undef256:
15204 case X86::BI__builtin_ia32_undef512:
15205 // The x86 definition of "undef" is not the same as the LLVM definition
15206 // (PR32176). We leave optimizing away an unnecessary zero constant to the
15207 // IR optimizer and backend.
15208 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
15209 // value, we should use that here instead of a zero.
15210 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15211 case X86::BI__builtin_ia32_vec_ext_v4hi:
15212 case X86::BI__builtin_ia32_vec_ext_v16qi:
15213 case X86::BI__builtin_ia32_vec_ext_v8hi:
15214 case X86::BI__builtin_ia32_vec_ext_v4si:
15215 case X86::BI__builtin_ia32_vec_ext_v4sf:
15216 case X86::BI__builtin_ia32_vec_ext_v2di:
15217 case X86::BI__builtin_ia32_vec_ext_v32qi:
15218 case X86::BI__builtin_ia32_vec_ext_v16hi:
15219 case X86::BI__builtin_ia32_vec_ext_v8si:
15220 case X86::BI__builtin_ia32_vec_ext_v4di: {
15221 unsigned NumElts =
15222 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15223 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15224 Index &= NumElts - 1;
15225 // These builtins exist so we can ensure the index is an ICE and in range.
15226 // Otherwise we could just do this in the header file.
15227 return Builder.CreateExtractElement(Ops[0], Index);
15228 }
15229 case X86::BI__builtin_ia32_vec_set_v4hi:
15230 case X86::BI__builtin_ia32_vec_set_v16qi:
15231 case X86::BI__builtin_ia32_vec_set_v8hi:
15232 case X86::BI__builtin_ia32_vec_set_v4si:
15233 case X86::BI__builtin_ia32_vec_set_v2di:
15234 case X86::BI__builtin_ia32_vec_set_v32qi:
15235 case X86::BI__builtin_ia32_vec_set_v16hi:
15236 case X86::BI__builtin_ia32_vec_set_v8si:
15237 case X86::BI__builtin_ia32_vec_set_v4di: {
15238 unsigned NumElts =
15239 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15240 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15241 Index &= NumElts - 1;
15242 // These builtins exist so we can ensure the index is an ICE and in range.
15243 // Otherwise we could just do this in the header file.
15244 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
15245 }
15246 case X86::BI_mm_setcsr:
15247 case X86::BI__builtin_ia32_ldmxcsr: {
15248 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
15249 Builder.CreateStore(Ops[0], Tmp);
15250 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
15251 Tmp.getPointer());
15252 }
15253 case X86::BI_mm_getcsr:
15254 case X86::BI__builtin_ia32_stmxcsr: {
15256 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
15257 Tmp.getPointer());
15258 return Builder.CreateLoad(Tmp, "stmxcsr");
15259 }
15260 case X86::BI__builtin_ia32_xsave:
15261 case X86::BI__builtin_ia32_xsave64:
15262 case X86::BI__builtin_ia32_xrstor:
15263 case X86::BI__builtin_ia32_xrstor64:
15264 case X86::BI__builtin_ia32_xsaveopt:
15265 case X86::BI__builtin_ia32_xsaveopt64:
15266 case X86::BI__builtin_ia32_xrstors:
15267 case X86::BI__builtin_ia32_xrstors64:
15268 case X86::BI__builtin_ia32_xsavec:
15269 case X86::BI__builtin_ia32_xsavec64:
15270 case X86::BI__builtin_ia32_xsaves:
15271 case X86::BI__builtin_ia32_xsaves64:
15272 case X86::BI__builtin_ia32_xsetbv:
15273 case X86::BI_xsetbv: {
15274 Intrinsic::ID ID;
15275#define INTRINSIC_X86_XSAVE_ID(NAME) \
15276 case X86::BI__builtin_ia32_##NAME: \
15277 ID = Intrinsic::x86_##NAME; \
15278 break
15279 switch (BuiltinID) {
15280 default: llvm_unreachable("Unsupported intrinsic!");
15282 INTRINSIC_X86_XSAVE_ID(xsave64);
15283 INTRINSIC_X86_XSAVE_ID(xrstor);
15284 INTRINSIC_X86_XSAVE_ID(xrstor64);
15285 INTRINSIC_X86_XSAVE_ID(xsaveopt);
15286 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
15287 INTRINSIC_X86_XSAVE_ID(xrstors);
15288 INTRINSIC_X86_XSAVE_ID(xrstors64);
15289 INTRINSIC_X86_XSAVE_ID(xsavec);
15290 INTRINSIC_X86_XSAVE_ID(xsavec64);
15291 INTRINSIC_X86_XSAVE_ID(xsaves);
15292 INTRINSIC_X86_XSAVE_ID(xsaves64);
15293 INTRINSIC_X86_XSAVE_ID(xsetbv);
15294 case X86::BI_xsetbv:
15295 ID = Intrinsic::x86_xsetbv;
15296 break;
15297 }
15298#undef INTRINSIC_X86_XSAVE_ID
15299 Value *Mhi = Builder.CreateTrunc(
15300 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
15301 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
15302 Ops[1] = Mhi;
15303 Ops.push_back(Mlo);
15304 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15305 }
15306 case X86::BI__builtin_ia32_xgetbv:
15307 case X86::BI_xgetbv:
15308 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
15309 case X86::BI__builtin_ia32_storedqudi128_mask:
15310 case X86::BI__builtin_ia32_storedqusi128_mask:
15311 case X86::BI__builtin_ia32_storedquhi128_mask:
15312 case X86::BI__builtin_ia32_storedquqi128_mask:
15313 case X86::BI__builtin_ia32_storeupd128_mask:
15314 case X86::BI__builtin_ia32_storeups128_mask:
15315 case X86::BI__builtin_ia32_storedqudi256_mask:
15316 case X86::BI__builtin_ia32_storedqusi256_mask:
15317 case X86::BI__builtin_ia32_storedquhi256_mask:
15318 case X86::BI__builtin_ia32_storedquqi256_mask:
15319 case X86::BI__builtin_ia32_storeupd256_mask:
15320 case X86::BI__builtin_ia32_storeups256_mask:
15321 case X86::BI__builtin_ia32_storedqudi512_mask:
15322 case X86::BI__builtin_ia32_storedqusi512_mask:
15323 case X86::BI__builtin_ia32_storedquhi512_mask:
15324 case X86::BI__builtin_ia32_storedquqi512_mask:
15325 case X86::BI__builtin_ia32_storeupd512_mask:
15326 case X86::BI__builtin_ia32_storeups512_mask:
15327 return EmitX86MaskedStore(*this, Ops, Align(1));
15328
15329 case X86::BI__builtin_ia32_storesbf16128_mask:
15330 case X86::BI__builtin_ia32_storesh128_mask:
15331 case X86::BI__builtin_ia32_storess128_mask:
15332 case X86::BI__builtin_ia32_storesd128_mask:
15333 return EmitX86MaskedStore(*this, Ops, Align(1));
15334
15335 case X86::BI__builtin_ia32_cvtmask2b128:
15336 case X86::BI__builtin_ia32_cvtmask2b256:
15337 case X86::BI__builtin_ia32_cvtmask2b512:
15338 case X86::BI__builtin_ia32_cvtmask2w128:
15339 case X86::BI__builtin_ia32_cvtmask2w256:
15340 case X86::BI__builtin_ia32_cvtmask2w512:
15341 case X86::BI__builtin_ia32_cvtmask2d128:
15342 case X86::BI__builtin_ia32_cvtmask2d256:
15343 case X86::BI__builtin_ia32_cvtmask2d512:
15344 case X86::BI__builtin_ia32_cvtmask2q128:
15345 case X86::BI__builtin_ia32_cvtmask2q256:
15346 case X86::BI__builtin_ia32_cvtmask2q512:
15347 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
15348
15349 case X86::BI__builtin_ia32_cvtb2mask128:
15350 case X86::BI__builtin_ia32_cvtb2mask256:
15351 case X86::BI__builtin_ia32_cvtb2mask512:
15352 case X86::BI__builtin_ia32_cvtw2mask128:
15353 case X86::BI__builtin_ia32_cvtw2mask256:
15354 case X86::BI__builtin_ia32_cvtw2mask512:
15355 case X86::BI__builtin_ia32_cvtd2mask128:
15356 case X86::BI__builtin_ia32_cvtd2mask256:
15357 case X86::BI__builtin_ia32_cvtd2mask512:
15358 case X86::BI__builtin_ia32_cvtq2mask128:
15359 case X86::BI__builtin_ia32_cvtq2mask256:
15360 case X86::BI__builtin_ia32_cvtq2mask512:
15361 return EmitX86ConvertToMask(*this, Ops[0]);
15362
15363 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
15364 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
15365 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
15366 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
15367 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
15368 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
15369 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
15370 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
15371 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
15372 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
15373 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
15374 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
15375 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
15376 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
15377 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
15378 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
15379 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
15380 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
15381 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
15382 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
15383 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
15384 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
15385 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
15386 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
15387 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
15388 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
15389
15390 case X86::BI__builtin_ia32_vfmaddss3:
15391 case X86::BI__builtin_ia32_vfmaddsd3:
15392 case X86::BI__builtin_ia32_vfmaddsh3_mask:
15393 case X86::BI__builtin_ia32_vfmaddss3_mask:
15394 case X86::BI__builtin_ia32_vfmaddsd3_mask:
15395 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
15396 case X86::BI__builtin_ia32_vfmaddss:
15397 case X86::BI__builtin_ia32_vfmaddsd:
15398 return EmitScalarFMAExpr(*this, E, Ops,
15399 Constant::getNullValue(Ops[0]->getType()));
15400 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
15401 case X86::BI__builtin_ia32_vfmaddss3_maskz:
15402 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
15403 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
15404 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
15405 case X86::BI__builtin_ia32_vfmaddss3_mask3:
15406 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
15407 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
15408 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
15409 case X86::BI__builtin_ia32_vfmsubss3_mask3:
15410 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
15411 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
15412 /*NegAcc*/ true);
15413 case X86::BI__builtin_ia32_vfmaddph:
15414 case X86::BI__builtin_ia32_vfmaddps:
15415 case X86::BI__builtin_ia32_vfmaddpd:
15416 case X86::BI__builtin_ia32_vfmaddph256:
15417 case X86::BI__builtin_ia32_vfmaddps256:
15418 case X86::BI__builtin_ia32_vfmaddpd256:
15419 case X86::BI__builtin_ia32_vfmaddph512_mask:
15420 case X86::BI__builtin_ia32_vfmaddph512_maskz:
15421 case X86::BI__builtin_ia32_vfmaddph512_mask3:
15422 case X86::BI__builtin_ia32_vfmaddnepbh128:
15423 case X86::BI__builtin_ia32_vfmaddnepbh256:
15424 case X86::BI__builtin_ia32_vfmaddnepbh512:
15425 case X86::BI__builtin_ia32_vfmaddps512_mask:
15426 case X86::BI__builtin_ia32_vfmaddps512_maskz:
15427 case X86::BI__builtin_ia32_vfmaddps512_mask3:
15428 case X86::BI__builtin_ia32_vfmsubps512_mask3:
15429 case X86::BI__builtin_ia32_vfmaddpd512_mask:
15430 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
15431 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
15432 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
15433 case X86::BI__builtin_ia32_vfmsubph512_mask3:
15434 case X86::BI__builtin_ia32_vfmaddph256_round_mask:
15435 case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
15436 case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
15437 case X86::BI__builtin_ia32_vfmaddps256_round_mask:
15438 case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
15439 case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
15440 case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
15441 case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
15442 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
15443 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
15444 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
15445 case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
15446 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
15447 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
15448 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
15449 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
15450 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
15451 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
15452 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
15453 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
15454 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
15455 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
15456 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
15457 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
15458 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
15459 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
15460 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
15461 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
15462 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
15463 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
15464 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
15465 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
15466 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
15467 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
15468 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
15469 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
15470 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
15471 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
15472
15473 case X86::BI__builtin_ia32_movdqa32store128_mask:
15474 case X86::BI__builtin_ia32_movdqa64store128_mask:
15475 case X86::BI__builtin_ia32_storeaps128_mask:
15476 case X86::BI__builtin_ia32_storeapd128_mask:
15477 case X86::BI__builtin_ia32_movdqa32store256_mask:
15478 case X86::BI__builtin_ia32_movdqa64store256_mask:
15479 case X86::BI__builtin_ia32_storeaps256_mask:
15480 case X86::BI__builtin_ia32_storeapd256_mask:
15481 case X86::BI__builtin_ia32_movdqa32store512_mask:
15482 case X86::BI__builtin_ia32_movdqa64store512_mask:
15483 case X86::BI__builtin_ia32_storeaps512_mask:
15484 case X86::BI__builtin_ia32_storeapd512_mask:
15485 return EmitX86MaskedStore(
15486 *this, Ops,
15487 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15488
15489 case X86::BI__builtin_ia32_loadups128_mask:
15490 case X86::BI__builtin_ia32_loadups256_mask:
15491 case X86::BI__builtin_ia32_loadups512_mask:
15492 case X86::BI__builtin_ia32_loadupd128_mask:
15493 case X86::BI__builtin_ia32_loadupd256_mask:
15494 case X86::BI__builtin_ia32_loadupd512_mask:
15495 case X86::BI__builtin_ia32_loaddquqi128_mask:
15496 case X86::BI__builtin_ia32_loaddquqi256_mask:
15497 case X86::BI__builtin_ia32_loaddquqi512_mask:
15498 case X86::BI__builtin_ia32_loaddquhi128_mask:
15499 case X86::BI__builtin_ia32_loaddquhi256_mask:
15500 case X86::BI__builtin_ia32_loaddquhi512_mask:
15501 case X86::BI__builtin_ia32_loaddqusi128_mask:
15502 case X86::BI__builtin_ia32_loaddqusi256_mask:
15503 case X86::BI__builtin_ia32_loaddqusi512_mask:
15504 case X86::BI__builtin_ia32_loaddqudi128_mask:
15505 case X86::BI__builtin_ia32_loaddqudi256_mask:
15506 case X86::BI__builtin_ia32_loaddqudi512_mask:
15507 return EmitX86MaskedLoad(*this, Ops, Align(1));
15508
15509 case X86::BI__builtin_ia32_loadsbf16128_mask:
15510 case X86::BI__builtin_ia32_loadsh128_mask:
15511 case X86::BI__builtin_ia32_loadss128_mask:
15512 case X86::BI__builtin_ia32_loadsd128_mask:
15513 return EmitX86MaskedLoad(*this, Ops, Align(1));
15514
15515 case X86::BI__builtin_ia32_loadaps128_mask:
15516 case X86::BI__builtin_ia32_loadaps256_mask:
15517 case X86::BI__builtin_ia32_loadaps512_mask:
15518 case X86::BI__builtin_ia32_loadapd128_mask:
15519 case X86::BI__builtin_ia32_loadapd256_mask:
15520 case X86::BI__builtin_ia32_loadapd512_mask:
15521 case X86::BI__builtin_ia32_movdqa32load128_mask:
15522 case X86::BI__builtin_ia32_movdqa32load256_mask:
15523 case X86::BI__builtin_ia32_movdqa32load512_mask:
15524 case X86::BI__builtin_ia32_movdqa64load128_mask:
15525 case X86::BI__builtin_ia32_movdqa64load256_mask:
15526 case X86::BI__builtin_ia32_movdqa64load512_mask:
15527 return EmitX86MaskedLoad(
15528 *this, Ops,
15529 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15530
15531 case X86::BI__builtin_ia32_expandloaddf128_mask:
15532 case X86::BI__builtin_ia32_expandloaddf256_mask:
15533 case X86::BI__builtin_ia32_expandloaddf512_mask:
15534 case X86::BI__builtin_ia32_expandloadsf128_mask:
15535 case X86::BI__builtin_ia32_expandloadsf256_mask:
15536 case X86::BI__builtin_ia32_expandloadsf512_mask:
15537 case X86::BI__builtin_ia32_expandloaddi128_mask:
15538 case X86::BI__builtin_ia32_expandloaddi256_mask:
15539 case X86::BI__builtin_ia32_expandloaddi512_mask:
15540 case X86::BI__builtin_ia32_expandloadsi128_mask:
15541 case X86::BI__builtin_ia32_expandloadsi256_mask:
15542 case X86::BI__builtin_ia32_expandloadsi512_mask:
15543 case X86::BI__builtin_ia32_expandloadhi128_mask:
15544 case X86::BI__builtin_ia32_expandloadhi256_mask:
15545 case X86::BI__builtin_ia32_expandloadhi512_mask:
15546 case X86::BI__builtin_ia32_expandloadqi128_mask:
15547 case X86::BI__builtin_ia32_expandloadqi256_mask:
15548 case X86::BI__builtin_ia32_expandloadqi512_mask:
15549 return EmitX86ExpandLoad(*this, Ops);
15550
15551 case X86::BI__builtin_ia32_compressstoredf128_mask:
15552 case X86::BI__builtin_ia32_compressstoredf256_mask:
15553 case X86::BI__builtin_ia32_compressstoredf512_mask:
15554 case X86::BI__builtin_ia32_compressstoresf128_mask:
15555 case X86::BI__builtin_ia32_compressstoresf256_mask:
15556 case X86::BI__builtin_ia32_compressstoresf512_mask:
15557 case X86::BI__builtin_ia32_compressstoredi128_mask:
15558 case X86::BI__builtin_ia32_compressstoredi256_mask:
15559 case X86::BI__builtin_ia32_compressstoredi512_mask:
15560 case X86::BI__builtin_ia32_compressstoresi128_mask:
15561 case X86::BI__builtin_ia32_compressstoresi256_mask:
15562 case X86::BI__builtin_ia32_compressstoresi512_mask:
15563 case X86::BI__builtin_ia32_compressstorehi128_mask:
15564 case X86::BI__builtin_ia32_compressstorehi256_mask:
15565 case X86::BI__builtin_ia32_compressstorehi512_mask:
15566 case X86::BI__builtin_ia32_compressstoreqi128_mask:
15567 case X86::BI__builtin_ia32_compressstoreqi256_mask:
15568 case X86::BI__builtin_ia32_compressstoreqi512_mask:
15569 return EmitX86CompressStore(*this, Ops);
15570
15571 case X86::BI__builtin_ia32_expanddf128_mask:
15572 case X86::BI__builtin_ia32_expanddf256_mask:
15573 case X86::BI__builtin_ia32_expanddf512_mask:
15574 case X86::BI__builtin_ia32_expandsf128_mask:
15575 case X86::BI__builtin_ia32_expandsf256_mask:
15576 case X86::BI__builtin_ia32_expandsf512_mask:
15577 case X86::BI__builtin_ia32_expanddi128_mask:
15578 case X86::BI__builtin_ia32_expanddi256_mask:
15579 case X86::BI__builtin_ia32_expanddi512_mask:
15580 case X86::BI__builtin_ia32_expandsi128_mask:
15581 case X86::BI__builtin_ia32_expandsi256_mask:
15582 case X86::BI__builtin_ia32_expandsi512_mask:
15583 case X86::BI__builtin_ia32_expandhi128_mask:
15584 case X86::BI__builtin_ia32_expandhi256_mask:
15585 case X86::BI__builtin_ia32_expandhi512_mask:
15586 case X86::BI__builtin_ia32_expandqi128_mask:
15587 case X86::BI__builtin_ia32_expandqi256_mask:
15588 case X86::BI__builtin_ia32_expandqi512_mask:
15589 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
15590
15591 case X86::BI__builtin_ia32_compressdf128_mask:
15592 case X86::BI__builtin_ia32_compressdf256_mask:
15593 case X86::BI__builtin_ia32_compressdf512_mask:
15594 case X86::BI__builtin_ia32_compresssf128_mask:
15595 case X86::BI__builtin_ia32_compresssf256_mask:
15596 case X86::BI__builtin_ia32_compresssf512_mask:
15597 case X86::BI__builtin_ia32_compressdi128_mask:
15598 case X86::BI__builtin_ia32_compressdi256_mask:
15599 case X86::BI__builtin_ia32_compressdi512_mask:
15600 case X86::BI__builtin_ia32_compresssi128_mask:
15601 case X86::BI__builtin_ia32_compresssi256_mask:
15602 case X86::BI__builtin_ia32_compresssi512_mask:
15603 case X86::BI__builtin_ia32_compresshi128_mask:
15604 case X86::BI__builtin_ia32_compresshi256_mask:
15605 case X86::BI__builtin_ia32_compresshi512_mask:
15606 case X86::BI__builtin_ia32_compressqi128_mask:
15607 case X86::BI__builtin_ia32_compressqi256_mask:
15608 case X86::BI__builtin_ia32_compressqi512_mask:
15609 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
15610
15611 case X86::BI__builtin_ia32_gather3div2df:
15612 case X86::BI__builtin_ia32_gather3div2di:
15613 case X86::BI__builtin_ia32_gather3div4df:
15614 case X86::BI__builtin_ia32_gather3div4di:
15615 case X86::BI__builtin_ia32_gather3div4sf:
15616 case X86::BI__builtin_ia32_gather3div4si:
15617 case X86::BI__builtin_ia32_gather3div8sf:
15618 case X86::BI__builtin_ia32_gather3div8si:
15619 case X86::BI__builtin_ia32_gather3siv2df:
15620 case X86::BI__builtin_ia32_gather3siv2di:
15621 case X86::BI__builtin_ia32_gather3siv4df:
15622 case X86::BI__builtin_ia32_gather3siv4di:
15623 case X86::BI__builtin_ia32_gather3siv4sf:
15624 case X86::BI__builtin_ia32_gather3siv4si:
15625 case X86::BI__builtin_ia32_gather3siv8sf:
15626 case X86::BI__builtin_ia32_gather3siv8si:
15627 case X86::BI__builtin_ia32_gathersiv8df:
15628 case X86::BI__builtin_ia32_gathersiv16sf:
15629 case X86::BI__builtin_ia32_gatherdiv8df:
15630 case X86::BI__builtin_ia32_gatherdiv16sf:
15631 case X86::BI__builtin_ia32_gathersiv8di:
15632 case X86::BI__builtin_ia32_gathersiv16si:
15633 case X86::BI__builtin_ia32_gatherdiv8di:
15634 case X86::BI__builtin_ia32_gatherdiv16si: {
15635 Intrinsic::ID IID;
15636 switch (BuiltinID) {
15637 default: llvm_unreachable("Unexpected builtin");
15638 case X86::BI__builtin_ia32_gather3div2df:
15639 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
15640 break;
15641 case X86::BI__builtin_ia32_gather3div2di:
15642 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
15643 break;
15644 case X86::BI__builtin_ia32_gather3div4df:
15645 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
15646 break;
15647 case X86::BI__builtin_ia32_gather3div4di:
15648 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
15649 break;
15650 case X86::BI__builtin_ia32_gather3div4sf:
15651 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
15652 break;
15653 case X86::BI__builtin_ia32_gather3div4si:
15654 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
15655 break;
15656 case X86::BI__builtin_ia32_gather3div8sf:
15657 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
15658 break;
15659 case X86::BI__builtin_ia32_gather3div8si:
15660 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
15661 break;
15662 case X86::BI__builtin_ia32_gather3siv2df:
15663 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
15664 break;
15665 case X86::BI__builtin_ia32_gather3siv2di:
15666 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
15667 break;
15668 case X86::BI__builtin_ia32_gather3siv4df:
15669 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
15670 break;
15671 case X86::BI__builtin_ia32_gather3siv4di:
15672 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
15673 break;
15674 case X86::BI__builtin_ia32_gather3siv4sf:
15675 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
15676 break;
15677 case X86::BI__builtin_ia32_gather3siv4si:
15678 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
15679 break;
15680 case X86::BI__builtin_ia32_gather3siv8sf:
15681 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
15682 break;
15683 case X86::BI__builtin_ia32_gather3siv8si:
15684 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
15685 break;
15686 case X86::BI__builtin_ia32_gathersiv8df:
15687 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
15688 break;
15689 case X86::BI__builtin_ia32_gathersiv16sf:
15690 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
15691 break;
15692 case X86::BI__builtin_ia32_gatherdiv8df:
15693 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
15694 break;
15695 case X86::BI__builtin_ia32_gatherdiv16sf:
15696 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
15697 break;
15698 case X86::BI__builtin_ia32_gathersiv8di:
15699 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
15700 break;
15701 case X86::BI__builtin_ia32_gathersiv16si:
15702 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
15703 break;
15704 case X86::BI__builtin_ia32_gatherdiv8di:
15705 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
15706 break;
15707 case X86::BI__builtin_ia32_gatherdiv16si:
15708 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
15709 break;
15710 }
15711
15712 unsigned MinElts = std::min(
15713 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15714 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15715 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
15716 Function *Intr = CGM.getIntrinsic(IID);
15717 return Builder.CreateCall(Intr, Ops);
15718 }
15719
15720 case X86::BI__builtin_ia32_scattersiv8df:
15721 case X86::BI__builtin_ia32_scattersiv16sf:
15722 case X86::BI__builtin_ia32_scatterdiv8df:
15723 case X86::BI__builtin_ia32_scatterdiv16sf:
15724 case X86::BI__builtin_ia32_scattersiv8di:
15725 case X86::BI__builtin_ia32_scattersiv16si:
15726 case X86::BI__builtin_ia32_scatterdiv8di:
15727 case X86::BI__builtin_ia32_scatterdiv16si:
15728 case X86::BI__builtin_ia32_scatterdiv2df:
15729 case X86::BI__builtin_ia32_scatterdiv2di:
15730 case X86::BI__builtin_ia32_scatterdiv4df:
15731 case X86::BI__builtin_ia32_scatterdiv4di:
15732 case X86::BI__builtin_ia32_scatterdiv4sf:
15733 case X86::BI__builtin_ia32_scatterdiv4si:
15734 case X86::BI__builtin_ia32_scatterdiv8sf:
15735 case X86::BI__builtin_ia32_scatterdiv8si:
15736 case X86::BI__builtin_ia32_scattersiv2df:
15737 case X86::BI__builtin_ia32_scattersiv2di:
15738 case X86::BI__builtin_ia32_scattersiv4df:
15739 case X86::BI__builtin_ia32_scattersiv4di:
15740 case X86::BI__builtin_ia32_scattersiv4sf:
15741 case X86::BI__builtin_ia32_scattersiv4si:
15742 case X86::BI__builtin_ia32_scattersiv8sf:
15743 case X86::BI__builtin_ia32_scattersiv8si: {
15744 Intrinsic::ID IID;
15745 switch (BuiltinID) {
15746 default: llvm_unreachable("Unexpected builtin");
15747 case X86::BI__builtin_ia32_scattersiv8df:
15748 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15749 break;
15750 case X86::BI__builtin_ia32_scattersiv16sf:
15751 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15752 break;
15753 case X86::BI__builtin_ia32_scatterdiv8df:
15754 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15755 break;
15756 case X86::BI__builtin_ia32_scatterdiv16sf:
15757 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15758 break;
15759 case X86::BI__builtin_ia32_scattersiv8di:
15760 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15761 break;
15762 case X86::BI__builtin_ia32_scattersiv16si:
15763 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15764 break;
15765 case X86::BI__builtin_ia32_scatterdiv8di:
15766 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15767 break;
15768 case X86::BI__builtin_ia32_scatterdiv16si:
15769 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15770 break;
15771 case X86::BI__builtin_ia32_scatterdiv2df:
15772 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15773 break;
15774 case X86::BI__builtin_ia32_scatterdiv2di:
15775 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15776 break;
15777 case X86::BI__builtin_ia32_scatterdiv4df:
15778 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15779 break;
15780 case X86::BI__builtin_ia32_scatterdiv4di:
15781 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15782 break;
15783 case X86::BI__builtin_ia32_scatterdiv4sf:
15784 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15785 break;
15786 case X86::BI__builtin_ia32_scatterdiv4si:
15787 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15788 break;
15789 case X86::BI__builtin_ia32_scatterdiv8sf:
15790 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15791 break;
15792 case X86::BI__builtin_ia32_scatterdiv8si:
15793 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15794 break;
15795 case X86::BI__builtin_ia32_scattersiv2df:
15796 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15797 break;
15798 case X86::BI__builtin_ia32_scattersiv2di:
15799 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15800 break;
15801 case X86::BI__builtin_ia32_scattersiv4df:
15802 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15803 break;
15804 case X86::BI__builtin_ia32_scattersiv4di:
15805 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15806 break;
15807 case X86::BI__builtin_ia32_scattersiv4sf:
15808 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15809 break;
15810 case X86::BI__builtin_ia32_scattersiv4si:
15811 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15812 break;
15813 case X86::BI__builtin_ia32_scattersiv8sf:
15814 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15815 break;
15816 case X86::BI__builtin_ia32_scattersiv8si:
15817 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15818 break;
15819 }
15820
15821 unsigned MinElts = std::min(
15822 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15823 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15824 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15825 Function *Intr = CGM.getIntrinsic(IID);
15826 return Builder.CreateCall(Intr, Ops);
15827 }
15828
15829 case X86::BI__builtin_ia32_vextractf128_pd256:
15830 case X86::BI__builtin_ia32_vextractf128_ps256:
15831 case X86::BI__builtin_ia32_vextractf128_si256:
15832 case X86::BI__builtin_ia32_extract128i256:
15833 case X86::BI__builtin_ia32_extractf64x4_mask:
15834 case X86::BI__builtin_ia32_extractf32x4_mask:
15835 case X86::BI__builtin_ia32_extracti64x4_mask:
15836 case X86::BI__builtin_ia32_extracti32x4_mask:
15837 case X86::BI__builtin_ia32_extractf32x8_mask:
15838 case X86::BI__builtin_ia32_extracti32x8_mask:
15839 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15840 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15841 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15842 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15843 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15844 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15845 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15846 unsigned NumElts = DstTy->getNumElements();
15847 unsigned SrcNumElts =
15848 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15849 unsigned SubVectors = SrcNumElts / NumElts;
15850 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15851 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15852 Index &= SubVectors - 1; // Remove any extra bits.
15853 Index *= NumElts;
15854
15855 int Indices[16];
15856 for (unsigned i = 0; i != NumElts; ++i)
15857 Indices[i] = i + Index;
15858
15859 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15860 "extract");
15861
15862 if (Ops.size() == 4)
15863 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15864
15865 return Res;
15866 }
15867 case X86::BI__builtin_ia32_vinsertf128_pd256:
15868 case X86::BI__builtin_ia32_vinsertf128_ps256:
15869 case X86::BI__builtin_ia32_vinsertf128_si256:
15870 case X86::BI__builtin_ia32_insert128i256:
15871 case X86::BI__builtin_ia32_insertf64x4:
15872 case X86::BI__builtin_ia32_insertf32x4:
15873 case X86::BI__builtin_ia32_inserti64x4:
15874 case X86::BI__builtin_ia32_inserti32x4:
15875 case X86::BI__builtin_ia32_insertf32x8:
15876 case X86::BI__builtin_ia32_inserti32x8:
15877 case X86::BI__builtin_ia32_insertf32x4_256:
15878 case X86::BI__builtin_ia32_inserti32x4_256:
15879 case X86::BI__builtin_ia32_insertf64x2_256:
15880 case X86::BI__builtin_ia32_inserti64x2_256:
15881 case X86::BI__builtin_ia32_insertf64x2_512:
15882 case X86::BI__builtin_ia32_inserti64x2_512: {
15883 unsigned DstNumElts =
15884 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15885 unsigned SrcNumElts =
15886 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15887 unsigned SubVectors = DstNumElts / SrcNumElts;
15888 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15889 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15890 Index &= SubVectors - 1; // Remove any extra bits.
15891 Index *= SrcNumElts;
15892
15893 int Indices[16];
15894 for (unsigned i = 0; i != DstNumElts; ++i)
15895 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15896
15897 Value *Op1 = Builder.CreateShuffleVector(
15898 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15899
15900 for (unsigned i = 0; i != DstNumElts; ++i) {
15901 if (i >= Index && i < (Index + SrcNumElts))
15902 Indices[i] = (i - Index) + DstNumElts;
15903 else
15904 Indices[i] = i;
15905 }
15906
15907 return Builder.CreateShuffleVector(Ops[0], Op1,
15908 ArrayRef(Indices, DstNumElts), "insert");
15909 }
15910 case X86::BI__builtin_ia32_pmovqd512_mask:
15911 case X86::BI__builtin_ia32_pmovwb512_mask: {
15912 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15913 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15914 }
15915 case X86::BI__builtin_ia32_pmovdb512_mask:
15916 case X86::BI__builtin_ia32_pmovdw512_mask:
15917 case X86::BI__builtin_ia32_pmovqw512_mask: {
15918 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15919 if (C->isAllOnesValue())
15920 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15921
15922 Intrinsic::ID IID;
15923 switch (BuiltinID) {
15924 default: llvm_unreachable("Unsupported intrinsic!");
15925 case X86::BI__builtin_ia32_pmovdb512_mask:
15926 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15927 break;
15928 case X86::BI__builtin_ia32_pmovdw512_mask:
15929 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15930 break;
15931 case X86::BI__builtin_ia32_pmovqw512_mask:
15932 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15933 break;
15934 }
15935
15936 Function *Intr = CGM.getIntrinsic(IID);
15937 return Builder.CreateCall(Intr, Ops);
15938 }
15939 case X86::BI__builtin_ia32_pblendw128:
15940 case X86::BI__builtin_ia32_blendpd:
15941 case X86::BI__builtin_ia32_blendps:
15942 case X86::BI__builtin_ia32_blendpd256:
15943 case X86::BI__builtin_ia32_blendps256:
15944 case X86::BI__builtin_ia32_pblendw256:
15945 case X86::BI__builtin_ia32_pblendd128:
15946 case X86::BI__builtin_ia32_pblendd256: {
15947 unsigned NumElts =
15948 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15949 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15950
15951 int Indices[16];
15952 // If there are more than 8 elements, the immediate is used twice so make
15953 // sure we handle that.
15954 for (unsigned i = 0; i != NumElts; ++i)
15955 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15956
15957 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15958 ArrayRef(Indices, NumElts), "blend");
15959 }
15960 case X86::BI__builtin_ia32_pshuflw:
15961 case X86::BI__builtin_ia32_pshuflw256:
15962 case X86::BI__builtin_ia32_pshuflw512: {
15963 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15964 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15965 unsigned NumElts = Ty->getNumElements();
15966
15967 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15968 Imm = (Imm & 0xff) * 0x01010101;
15969
15970 int Indices[32];
15971 for (unsigned l = 0; l != NumElts; l += 8) {
15972 for (unsigned i = 0; i != 4; ++i) {
15973 Indices[l + i] = l + (Imm & 3);
15974 Imm >>= 2;
15975 }
15976 for (unsigned i = 4; i != 8; ++i)
15977 Indices[l + i] = l + i;
15978 }
15979
15980 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15981 "pshuflw");
15982 }
15983 case X86::BI__builtin_ia32_pshufhw:
15984 case X86::BI__builtin_ia32_pshufhw256:
15985 case X86::BI__builtin_ia32_pshufhw512: {
15986 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15987 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15988 unsigned NumElts = Ty->getNumElements();
15989
15990 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15991 Imm = (Imm & 0xff) * 0x01010101;
15992
15993 int Indices[32];
15994 for (unsigned l = 0; l != NumElts; l += 8) {
15995 for (unsigned i = 0; i != 4; ++i)
15996 Indices[l + i] = l + i;
15997 for (unsigned i = 4; i != 8; ++i) {
15998 Indices[l + i] = l + 4 + (Imm & 3);
15999 Imm >>= 2;
16000 }
16001 }
16002
16003 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16004 "pshufhw");
16005 }
16006 case X86::BI__builtin_ia32_pshufd:
16007 case X86::BI__builtin_ia32_pshufd256:
16008 case X86::BI__builtin_ia32_pshufd512:
16009 case X86::BI__builtin_ia32_vpermilpd:
16010 case X86::BI__builtin_ia32_vpermilps:
16011 case X86::BI__builtin_ia32_vpermilpd256:
16012 case X86::BI__builtin_ia32_vpermilps256:
16013 case X86::BI__builtin_ia32_vpermilpd512:
16014 case X86::BI__builtin_ia32_vpermilps512: {
16015 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16016 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16017 unsigned NumElts = Ty->getNumElements();
16018 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
16019 unsigned NumLaneElts = NumElts / NumLanes;
16020
16021 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16022 Imm = (Imm & 0xff) * 0x01010101;
16023
16024 int Indices[16];
16025 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16026 for (unsigned i = 0; i != NumLaneElts; ++i) {
16027 Indices[i + l] = (Imm % NumLaneElts) + l;
16028 Imm /= NumLaneElts;
16029 }
16030 }
16031
16032 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16033 "permil");
16034 }
16035 case X86::BI__builtin_ia32_shufpd:
16036 case X86::BI__builtin_ia32_shufpd256:
16037 case X86::BI__builtin_ia32_shufpd512:
16038 case X86::BI__builtin_ia32_shufps:
16039 case X86::BI__builtin_ia32_shufps256:
16040 case X86::BI__builtin_ia32_shufps512: {
16041 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16042 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16043 unsigned NumElts = Ty->getNumElements();
16044 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
16045 unsigned NumLaneElts = NumElts / NumLanes;
16046
16047 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16048 Imm = (Imm & 0xff) * 0x01010101;
16049
16050 int Indices[16];
16051 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16052 for (unsigned i = 0; i != NumLaneElts; ++i) {
16053 unsigned Index = Imm % NumLaneElts;
16054 Imm /= NumLaneElts;
16055 if (i >= (NumLaneElts / 2))
16056 Index += NumElts;
16057 Indices[l + i] = l + Index;
16058 }
16059 }
16060
16061 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16062 ArrayRef(Indices, NumElts), "shufp");
16063 }
16064 case X86::BI__builtin_ia32_permdi256:
16065 case X86::BI__builtin_ia32_permdf256:
16066 case X86::BI__builtin_ia32_permdi512:
16067 case X86::BI__builtin_ia32_permdf512: {
16068 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16069 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16070 unsigned NumElts = Ty->getNumElements();
16071
16072 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
16073 int Indices[8];
16074 for (unsigned l = 0; l != NumElts; l += 4)
16075 for (unsigned i = 0; i != 4; ++i)
16076 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
16077
16078 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16079 "perm");
16080 }
16081 case X86::BI__builtin_ia32_palignr128:
16082 case X86::BI__builtin_ia32_palignr256:
16083 case X86::BI__builtin_ia32_palignr512: {
16084 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16085
16086 unsigned NumElts =
16087 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16088 assert(NumElts % 16 == 0);
16089
16090 // If palignr is shifting the pair of vectors more than the size of two
16091 // lanes, emit zero.
16092 if (ShiftVal >= 32)
16093 return llvm::Constant::getNullValue(ConvertType(E->getType()));
16094
16095 // If palignr is shifting the pair of input vectors more than one lane,
16096 // but less than two lanes, convert to shifting in zeroes.
16097 if (ShiftVal > 16) {
16098 ShiftVal -= 16;
16099 Ops[1] = Ops[0];
16100 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
16101 }
16102
16103 int Indices[64];
16104 // 256-bit palignr operates on 128-bit lanes so we need to handle that
16105 for (unsigned l = 0; l != NumElts; l += 16) {
16106 for (unsigned i = 0; i != 16; ++i) {
16107 unsigned Idx = ShiftVal + i;
16108 if (Idx >= 16)
16109 Idx += NumElts - 16; // End of lane, switch operand.
16110 Indices[l + i] = Idx + l;
16111 }
16112 }
16113
16114 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16115 ArrayRef(Indices, NumElts), "palignr");
16116 }
16117 case X86::BI__builtin_ia32_alignd128:
16118 case X86::BI__builtin_ia32_alignd256:
16119 case X86::BI__builtin_ia32_alignd512:
16120 case X86::BI__builtin_ia32_alignq128:
16121 case X86::BI__builtin_ia32_alignq256:
16122 case X86::BI__builtin_ia32_alignq512: {
16123 unsigned NumElts =
16124 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16125 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16126
16127 // Mask the shift amount to width of a vector.
16128 ShiftVal &= NumElts - 1;
16129
16130 int Indices[16];
16131 for (unsigned i = 0; i != NumElts; ++i)
16132 Indices[i] = i + ShiftVal;
16133
16134 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16135 ArrayRef(Indices, NumElts), "valign");
16136 }
16137 case X86::BI__builtin_ia32_shuf_f32x4_256:
16138 case X86::BI__builtin_ia32_shuf_f64x2_256:
16139 case X86::BI__builtin_ia32_shuf_i32x4_256:
16140 case X86::BI__builtin_ia32_shuf_i64x2_256:
16141 case X86::BI__builtin_ia32_shuf_f32x4:
16142 case X86::BI__builtin_ia32_shuf_f64x2:
16143 case X86::BI__builtin_ia32_shuf_i32x4:
16144 case X86::BI__builtin_ia32_shuf_i64x2: {
16145 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16146 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16147 unsigned NumElts = Ty->getNumElements();
16148 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
16149 unsigned NumLaneElts = NumElts / NumLanes;
16150
16151 int Indices[16];
16152 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16153 unsigned Index = (Imm % NumLanes) * NumLaneElts;
16154 Imm /= NumLanes; // Discard the bits we just used.
16155 if (l >= (NumElts / 2))
16156 Index += NumElts; // Switch to other source.
16157 for (unsigned i = 0; i != NumLaneElts; ++i) {
16158 Indices[l + i] = Index + i;
16159 }
16160 }
16161
16162 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16163 ArrayRef(Indices, NumElts), "shuf");
16164 }
16165
16166 case X86::BI__builtin_ia32_vperm2f128_pd256:
16167 case X86::BI__builtin_ia32_vperm2f128_ps256:
16168 case X86::BI__builtin_ia32_vperm2f128_si256:
16169 case X86::BI__builtin_ia32_permti256: {
16170 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16171 unsigned NumElts =
16172 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16173
16174 // This takes a very simple approach since there are two lanes and a
16175 // shuffle can have 2 inputs. So we reserve the first input for the first
16176 // lane and the second input for the second lane. This may result in
16177 // duplicate sources, but this can be dealt with in the backend.
16178
16179 Value *OutOps[2];
16180 int Indices[8];
16181 for (unsigned l = 0; l != 2; ++l) {
16182 // Determine the source for this lane.
16183 if (Imm & (1 << ((l * 4) + 3)))
16184 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
16185 else if (Imm & (1 << ((l * 4) + 1)))
16186 OutOps[l] = Ops[1];
16187 else
16188 OutOps[l] = Ops[0];
16189
16190 for (unsigned i = 0; i != NumElts/2; ++i) {
16191 // Start with ith element of the source for this lane.
16192 unsigned Idx = (l * NumElts) + i;
16193 // If bit 0 of the immediate half is set, switch to the high half of
16194 // the source.
16195 if (Imm & (1 << (l * 4)))
16196 Idx += NumElts/2;
16197 Indices[(l * (NumElts/2)) + i] = Idx;
16198 }
16199 }
16200
16201 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
16202 ArrayRef(Indices, NumElts), "vperm");
16203 }
16204
16205 case X86::BI__builtin_ia32_pslldqi128_byteshift:
16206 case X86::BI__builtin_ia32_pslldqi256_byteshift:
16207 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
16208 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16209 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16210 // Builtin type is vXi64 so multiply by 8 to get bytes.
16211 unsigned NumElts = ResultType->getNumElements() * 8;
16212
16213 // If pslldq is shifting the vector more than 15 bytes, emit zero.
16214 if (ShiftVal >= 16)
16215 return llvm::Constant::getNullValue(ResultType);
16216
16217 int Indices[64];
16218 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
16219 for (unsigned l = 0; l != NumElts; l += 16) {
16220 for (unsigned i = 0; i != 16; ++i) {
16221 unsigned Idx = NumElts + i - ShiftVal;
16222 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
16223 Indices[l + i] = Idx + l;
16224 }
16225 }
16226
16227 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16228 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16229 Value *Zero = llvm::Constant::getNullValue(VecTy);
16230 Value *SV = Builder.CreateShuffleVector(
16231 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
16232 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
16233 }
16234 case X86::BI__builtin_ia32_psrldqi128_byteshift:
16235 case X86::BI__builtin_ia32_psrldqi256_byteshift:
16236 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
16237 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16238 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16239 // Builtin type is vXi64 so multiply by 8 to get bytes.
16240 unsigned NumElts = ResultType->getNumElements() * 8;
16241
16242 // If psrldq is shifting the vector more than 15 bytes, emit zero.
16243 if (ShiftVal >= 16)
16244 return llvm::Constant::getNullValue(ResultType);
16245
16246 int Indices[64];
16247 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
16248 for (unsigned l = 0; l != NumElts; l += 16) {
16249 for (unsigned i = 0; i != 16; ++i) {
16250 unsigned Idx = i + ShiftVal;
16251 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
16252 Indices[l + i] = Idx + l;
16253 }
16254 }
16255
16256 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16257 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16258 Value *Zero = llvm::Constant::getNullValue(VecTy);
16259 Value *SV = Builder.CreateShuffleVector(
16260 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
16261 return Builder.CreateBitCast(SV, ResultType, "cast");
16262 }
16263 case X86::BI__builtin_ia32_kshiftliqi:
16264 case X86::BI__builtin_ia32_kshiftlihi:
16265 case X86::BI__builtin_ia32_kshiftlisi:
16266 case X86::BI__builtin_ia32_kshiftlidi: {
16267 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16268 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16269
16270 if (ShiftVal >= NumElts)
16271 return llvm::Constant::getNullValue(Ops[0]->getType());
16272
16273 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16274
16275 int Indices[64];
16276 for (unsigned i = 0; i != NumElts; ++i)
16277 Indices[i] = NumElts + i - ShiftVal;
16278
16279 Value *Zero = llvm::Constant::getNullValue(In->getType());
16280 Value *SV = Builder.CreateShuffleVector(
16281 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
16282 return Builder.CreateBitCast(SV, Ops[0]->getType());
16283 }
16284 case X86::BI__builtin_ia32_kshiftriqi:
16285 case X86::BI__builtin_ia32_kshiftrihi:
16286 case X86::BI__builtin_ia32_kshiftrisi:
16287 case X86::BI__builtin_ia32_kshiftridi: {
16288 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16289 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16290
16291 if (ShiftVal >= NumElts)
16292 return llvm::Constant::getNullValue(Ops[0]->getType());
16293
16294 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16295
16296 int Indices[64];
16297 for (unsigned i = 0; i != NumElts; ++i)
16298 Indices[i] = i + ShiftVal;
16299
16300 Value *Zero = llvm::Constant::getNullValue(In->getType());
16301 Value *SV = Builder.CreateShuffleVector(
16302 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
16303 return Builder.CreateBitCast(SV, Ops[0]->getType());
16304 }
16305 case X86::BI__builtin_ia32_movnti:
16306 case X86::BI__builtin_ia32_movnti64:
16307 case X86::BI__builtin_ia32_movntsd:
16308 case X86::BI__builtin_ia32_movntss: {
16309 llvm::MDNode *Node = llvm::MDNode::get(
16310 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
16311
16312 Value *Ptr = Ops[0];
16313 Value *Src = Ops[1];
16314
16315 // Extract the 0'th element of the source vector.
16316 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
16317 BuiltinID == X86::BI__builtin_ia32_movntss)
16318 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
16319
16320 // Unaligned nontemporal store of the scalar value.
16321 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
16322 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
16323 SI->setAlignment(llvm::Align(1));
16324 return SI;
16325 }
16326 // Rotate is a special case of funnel shift - 1st 2 args are the same.
16327 case X86::BI__builtin_ia32_vprotb:
16328 case X86::BI__builtin_ia32_vprotw:
16329 case X86::BI__builtin_ia32_vprotd:
16330 case X86::BI__builtin_ia32_vprotq:
16331 case X86::BI__builtin_ia32_vprotbi:
16332 case X86::BI__builtin_ia32_vprotwi:
16333 case X86::BI__builtin_ia32_vprotdi:
16334 case X86::BI__builtin_ia32_vprotqi:
16335 case X86::BI__builtin_ia32_prold128:
16336 case X86::BI__builtin_ia32_prold256:
16337 case X86::BI__builtin_ia32_prold512:
16338 case X86::BI__builtin_ia32_prolq128:
16339 case X86::BI__builtin_ia32_prolq256:
16340 case X86::BI__builtin_ia32_prolq512:
16341 case X86::BI__builtin_ia32_prolvd128:
16342 case X86::BI__builtin_ia32_prolvd256:
16343 case X86::BI__builtin_ia32_prolvd512:
16344 case X86::BI__builtin_ia32_prolvq128:
16345 case X86::BI__builtin_ia32_prolvq256:
16346 case X86::BI__builtin_ia32_prolvq512:
16347 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
16348 case X86::BI__builtin_ia32_prord128:
16349 case X86::BI__builtin_ia32_prord256:
16350 case X86::BI__builtin_ia32_prord512:
16351 case X86::BI__builtin_ia32_prorq128:
16352 case X86::BI__builtin_ia32_prorq256:
16353 case X86::BI__builtin_ia32_prorq512:
16354 case X86::BI__builtin_ia32_prorvd128:
16355 case X86::BI__builtin_ia32_prorvd256:
16356 case X86::BI__builtin_ia32_prorvd512:
16357 case X86::BI__builtin_ia32_prorvq128:
16358 case X86::BI__builtin_ia32_prorvq256:
16359 case X86::BI__builtin_ia32_prorvq512:
16360 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
16361 case X86::BI__builtin_ia32_selectb_128:
16362 case X86::BI__builtin_ia32_selectb_256:
16363 case X86::BI__builtin_ia32_selectb_512:
16364 case X86::BI__builtin_ia32_selectw_128:
16365 case X86::BI__builtin_ia32_selectw_256:
16366 case X86::BI__builtin_ia32_selectw_512:
16367 case X86::BI__builtin_ia32_selectd_128:
16368 case X86::BI__builtin_ia32_selectd_256:
16369 case X86::BI__builtin_ia32_selectd_512:
16370 case X86::BI__builtin_ia32_selectq_128:
16371 case X86::BI__builtin_ia32_selectq_256:
16372 case X86::BI__builtin_ia32_selectq_512:
16373 case X86::BI__builtin_ia32_selectph_128:
16374 case X86::BI__builtin_ia32_selectph_256:
16375 case X86::BI__builtin_ia32_selectph_512:
16376 case X86::BI__builtin_ia32_selectpbf_128:
16377 case X86::BI__builtin_ia32_selectpbf_256:
16378 case X86::BI__builtin_ia32_selectpbf_512:
16379 case X86::BI__builtin_ia32_selectps_128:
16380 case X86::BI__builtin_ia32_selectps_256:
16381 case X86::BI__builtin_ia32_selectps_512:
16382 case X86::BI__builtin_ia32_selectpd_128:
16383 case X86::BI__builtin_ia32_selectpd_256:
16384 case X86::BI__builtin_ia32_selectpd_512:
16385 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
16386 case X86::BI__builtin_ia32_selectsh_128:
16387 case X86::BI__builtin_ia32_selectsbf_128:
16388 case X86::BI__builtin_ia32_selectss_128:
16389 case X86::BI__builtin_ia32_selectsd_128: {
16390 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16391 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16392 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
16393 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
16394 }
16395 case X86::BI__builtin_ia32_cmpb128_mask:
16396 case X86::BI__builtin_ia32_cmpb256_mask:
16397 case X86::BI__builtin_ia32_cmpb512_mask:
16398 case X86::BI__builtin_ia32_cmpw128_mask:
16399 case X86::BI__builtin_ia32_cmpw256_mask:
16400 case X86::BI__builtin_ia32_cmpw512_mask:
16401 case X86::BI__builtin_ia32_cmpd128_mask:
16402 case X86::BI__builtin_ia32_cmpd256_mask:
16403 case X86::BI__builtin_ia32_cmpd512_mask:
16404 case X86::BI__builtin_ia32_cmpq128_mask:
16405 case X86::BI__builtin_ia32_cmpq256_mask:
16406 case X86::BI__builtin_ia32_cmpq512_mask: {
16407 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16408 return EmitX86MaskedCompare(*this, CC, true, Ops);
16409 }
16410 case X86::BI__builtin_ia32_ucmpb128_mask:
16411 case X86::BI__builtin_ia32_ucmpb256_mask:
16412 case X86::BI__builtin_ia32_ucmpb512_mask:
16413 case X86::BI__builtin_ia32_ucmpw128_mask:
16414 case X86::BI__builtin_ia32_ucmpw256_mask:
16415 case X86::BI__builtin_ia32_ucmpw512_mask:
16416 case X86::BI__builtin_ia32_ucmpd128_mask:
16417 case X86::BI__builtin_ia32_ucmpd256_mask:
16418 case X86::BI__builtin_ia32_ucmpd512_mask:
16419 case X86::BI__builtin_ia32_ucmpq128_mask:
16420 case X86::BI__builtin_ia32_ucmpq256_mask:
16421 case X86::BI__builtin_ia32_ucmpq512_mask: {
16422 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16423 return EmitX86MaskedCompare(*this, CC, false, Ops);
16424 }
16425 case X86::BI__builtin_ia32_vpcomb:
16426 case X86::BI__builtin_ia32_vpcomw:
16427 case X86::BI__builtin_ia32_vpcomd:
16428 case X86::BI__builtin_ia32_vpcomq:
16429 return EmitX86vpcom(*this, Ops, true);
16430 case X86::BI__builtin_ia32_vpcomub:
16431 case X86::BI__builtin_ia32_vpcomuw:
16432 case X86::BI__builtin_ia32_vpcomud:
16433 case X86::BI__builtin_ia32_vpcomuq:
16434 return EmitX86vpcom(*this, Ops, false);
16435
16436 case X86::BI__builtin_ia32_kortestcqi:
16437 case X86::BI__builtin_ia32_kortestchi:
16438 case X86::BI__builtin_ia32_kortestcsi:
16439 case X86::BI__builtin_ia32_kortestcdi: {
16440 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16441 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
16442 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16443 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16444 }
16445 case X86::BI__builtin_ia32_kortestzqi:
16446 case X86::BI__builtin_ia32_kortestzhi:
16447 case X86::BI__builtin_ia32_kortestzsi:
16448 case X86::BI__builtin_ia32_kortestzdi: {
16449 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16450 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
16451 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16452 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16453 }
16454
16455 case X86::BI__builtin_ia32_ktestcqi:
16456 case X86::BI__builtin_ia32_ktestzqi:
16457 case X86::BI__builtin_ia32_ktestchi:
16458 case X86::BI__builtin_ia32_ktestzhi:
16459 case X86::BI__builtin_ia32_ktestcsi:
16460 case X86::BI__builtin_ia32_ktestzsi:
16461 case X86::BI__builtin_ia32_ktestcdi:
16462 case X86::BI__builtin_ia32_ktestzdi: {
16463 Intrinsic::ID IID;
16464 switch (BuiltinID) {
16465 default: llvm_unreachable("Unsupported intrinsic!");
16466 case X86::BI__builtin_ia32_ktestcqi:
16467 IID = Intrinsic::x86_avx512_ktestc_b;
16468 break;
16469 case X86::BI__builtin_ia32_ktestzqi:
16470 IID = Intrinsic::x86_avx512_ktestz_b;
16471 break;
16472 case X86::BI__builtin_ia32_ktestchi:
16473 IID = Intrinsic::x86_avx512_ktestc_w;
16474 break;
16475 case X86::BI__builtin_ia32_ktestzhi:
16476 IID = Intrinsic::x86_avx512_ktestz_w;
16477 break;
16478 case X86::BI__builtin_ia32_ktestcsi:
16479 IID = Intrinsic::x86_avx512_ktestc_d;
16480 break;
16481 case X86::BI__builtin_ia32_ktestzsi:
16482 IID = Intrinsic::x86_avx512_ktestz_d;
16483 break;
16484 case X86::BI__builtin_ia32_ktestcdi:
16485 IID = Intrinsic::x86_avx512_ktestc_q;
16486 break;
16487 case X86::BI__builtin_ia32_ktestzdi:
16488 IID = Intrinsic::x86_avx512_ktestz_q;
16489 break;
16490 }
16491
16492 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16493 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16494 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16495 Function *Intr = CGM.getIntrinsic(IID);
16496 return Builder.CreateCall(Intr, {LHS, RHS});
16497 }
16498
16499 case X86::BI__builtin_ia32_kaddqi:
16500 case X86::BI__builtin_ia32_kaddhi:
16501 case X86::BI__builtin_ia32_kaddsi:
16502 case X86::BI__builtin_ia32_kadddi: {
16503 Intrinsic::ID IID;
16504 switch (BuiltinID) {
16505 default: llvm_unreachable("Unsupported intrinsic!");
16506 case X86::BI__builtin_ia32_kaddqi:
16507 IID = Intrinsic::x86_avx512_kadd_b;
16508 break;
16509 case X86::BI__builtin_ia32_kaddhi:
16510 IID = Intrinsic::x86_avx512_kadd_w;
16511 break;
16512 case X86::BI__builtin_ia32_kaddsi:
16513 IID = Intrinsic::x86_avx512_kadd_d;
16514 break;
16515 case X86::BI__builtin_ia32_kadddi:
16516 IID = Intrinsic::x86_avx512_kadd_q;
16517 break;
16518 }
16519
16520 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16521 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16522 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16523 Function *Intr = CGM.getIntrinsic(IID);
16524 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
16525 return Builder.CreateBitCast(Res, Ops[0]->getType());
16526 }
16527 case X86::BI__builtin_ia32_kandqi:
16528 case X86::BI__builtin_ia32_kandhi:
16529 case X86::BI__builtin_ia32_kandsi:
16530 case X86::BI__builtin_ia32_kanddi:
16531 return EmitX86MaskLogic(*this, Instruction::And, Ops);
16532 case X86::BI__builtin_ia32_kandnqi:
16533 case X86::BI__builtin_ia32_kandnhi:
16534 case X86::BI__builtin_ia32_kandnsi:
16535 case X86::BI__builtin_ia32_kandndi:
16536 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
16537 case X86::BI__builtin_ia32_korqi:
16538 case X86::BI__builtin_ia32_korhi:
16539 case X86::BI__builtin_ia32_korsi:
16540 case X86::BI__builtin_ia32_kordi:
16541 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
16542 case X86::BI__builtin_ia32_kxnorqi:
16543 case X86::BI__builtin_ia32_kxnorhi:
16544 case X86::BI__builtin_ia32_kxnorsi:
16545 case X86::BI__builtin_ia32_kxnordi:
16546 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
16547 case X86::BI__builtin_ia32_kxorqi:
16548 case X86::BI__builtin_ia32_kxorhi:
16549 case X86::BI__builtin_ia32_kxorsi:
16550 case X86::BI__builtin_ia32_kxordi:
16551 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
16552 case X86::BI__builtin_ia32_knotqi:
16553 case X86::BI__builtin_ia32_knothi:
16554 case X86::BI__builtin_ia32_knotsi:
16555 case X86::BI__builtin_ia32_knotdi: {
16556 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16557 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16558 return Builder.CreateBitCast(Builder.CreateNot(Res),
16559 Ops[0]->getType());
16560 }
16561 case X86::BI__builtin_ia32_kmovb:
16562 case X86::BI__builtin_ia32_kmovw:
16563 case X86::BI__builtin_ia32_kmovd:
16564 case X86::BI__builtin_ia32_kmovq: {
16565 // Bitcast to vXi1 type and then back to integer. This gets the mask
16566 // register type into the IR, but might be optimized out depending on
16567 // what's around it.
16568 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16569 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16570 return Builder.CreateBitCast(Res, Ops[0]->getType());
16571 }
16572
16573 case X86::BI__builtin_ia32_kunpckdi:
16574 case X86::BI__builtin_ia32_kunpcksi:
16575 case X86::BI__builtin_ia32_kunpckhi: {
16576 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16577 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16578 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16579 int Indices[64];
16580 for (unsigned i = 0; i != NumElts; ++i)
16581 Indices[i] = i;
16582
16583 // First extract half of each vector. This gives better codegen than
16584 // doing it in a single shuffle.
16585 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
16586 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
16587 // Concat the vectors.
16588 // NOTE: Operands are swapped to match the intrinsic definition.
16589 Value *Res =
16590 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
16591 return Builder.CreateBitCast(Res, Ops[0]->getType());
16592 }
16593
16594 case X86::BI__builtin_ia32_vplzcntd_128:
16595 case X86::BI__builtin_ia32_vplzcntd_256:
16596 case X86::BI__builtin_ia32_vplzcntd_512:
16597 case X86::BI__builtin_ia32_vplzcntq_128:
16598 case X86::BI__builtin_ia32_vplzcntq_256:
16599 case X86::BI__builtin_ia32_vplzcntq_512: {
16600 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
16601 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
16602 }
16603 case X86::BI__builtin_ia32_sqrtss:
16604 case X86::BI__builtin_ia32_sqrtsd: {
16605 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
16606 Function *F;
16607 if (Builder.getIsFPConstrained()) {
16608 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16609 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16610 A->getType());
16611 A = Builder.CreateConstrainedFPCall(F, {A});
16612 } else {
16613 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16614 A = Builder.CreateCall(F, {A});
16615 }
16616 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16617 }
16618 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16619 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16620 case X86::BI__builtin_ia32_sqrtss_round_mask: {
16621 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
16622 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16623 // otherwise keep the intrinsic.
16624 if (CC != 4) {
16625 Intrinsic::ID IID;
16626
16627 switch (BuiltinID) {
16628 default:
16629 llvm_unreachable("Unsupported intrinsic!");
16630 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16631 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
16632 break;
16633 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16634 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
16635 break;
16636 case X86::BI__builtin_ia32_sqrtss_round_mask:
16637 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
16638 break;
16639 }
16640 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16641 }
16642 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16643 Function *F;
16644 if (Builder.getIsFPConstrained()) {
16645 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16646 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16647 A->getType());
16648 A = Builder.CreateConstrainedFPCall(F, A);
16649 } else {
16650 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16651 A = Builder.CreateCall(F, A);
16652 }
16653 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16654 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
16655 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16656 }
16657 case X86::BI__builtin_ia32_sqrtpd256:
16658 case X86::BI__builtin_ia32_sqrtpd:
16659 case X86::BI__builtin_ia32_sqrtps256:
16660 case X86::BI__builtin_ia32_sqrtps:
16661 case X86::BI__builtin_ia32_sqrtph256:
16662 case X86::BI__builtin_ia32_sqrtph:
16663 case X86::BI__builtin_ia32_sqrtph512:
16664 case X86::BI__builtin_ia32_vsqrtnepbf16256:
16665 case X86::BI__builtin_ia32_vsqrtnepbf16:
16666 case X86::BI__builtin_ia32_vsqrtnepbf16512:
16667 case X86::BI__builtin_ia32_sqrtps512:
16668 case X86::BI__builtin_ia32_sqrtpd512: {
16669 if (Ops.size() == 2) {
16670 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16671 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16672 // otherwise keep the intrinsic.
16673 if (CC != 4) {
16674 Intrinsic::ID IID;
16675
16676 switch (BuiltinID) {
16677 default:
16678 llvm_unreachable("Unsupported intrinsic!");
16679 case X86::BI__builtin_ia32_sqrtph512:
16680 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
16681 break;
16682 case X86::BI__builtin_ia32_sqrtps512:
16683 IID = Intrinsic::x86_avx512_sqrt_ps_512;
16684 break;
16685 case X86::BI__builtin_ia32_sqrtpd512:
16686 IID = Intrinsic::x86_avx512_sqrt_pd_512;
16687 break;
16688 }
16689 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16690 }
16691 }
16692 if (Builder.getIsFPConstrained()) {
16693 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16694 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16695 Ops[0]->getType());
16696 return Builder.CreateConstrainedFPCall(F, Ops[0]);
16697 } else {
16698 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
16699 return Builder.CreateCall(F, Ops[0]);
16700 }
16701 }
16702
16703 case X86::BI__builtin_ia32_pmuludq128:
16704 case X86::BI__builtin_ia32_pmuludq256:
16705 case X86::BI__builtin_ia32_pmuludq512:
16706 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
16707
16708 case X86::BI__builtin_ia32_pmuldq128:
16709 case X86::BI__builtin_ia32_pmuldq256:
16710 case X86::BI__builtin_ia32_pmuldq512:
16711 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
16712
16713 case X86::BI__builtin_ia32_pternlogd512_mask:
16714 case X86::BI__builtin_ia32_pternlogq512_mask:
16715 case X86::BI__builtin_ia32_pternlogd128_mask:
16716 case X86::BI__builtin_ia32_pternlogd256_mask:
16717 case X86::BI__builtin_ia32_pternlogq128_mask:
16718 case X86::BI__builtin_ia32_pternlogq256_mask:
16719 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
16720
16721 case X86::BI__builtin_ia32_pternlogd512_maskz:
16722 case X86::BI__builtin_ia32_pternlogq512_maskz:
16723 case X86::BI__builtin_ia32_pternlogd128_maskz:
16724 case X86::BI__builtin_ia32_pternlogd256_maskz:
16725 case X86::BI__builtin_ia32_pternlogq128_maskz:
16726 case X86::BI__builtin_ia32_pternlogq256_maskz:
16727 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
16728
16729 case X86::BI__builtin_ia32_vpshldd128:
16730 case X86::BI__builtin_ia32_vpshldd256:
16731 case X86::BI__builtin_ia32_vpshldd512:
16732 case X86::BI__builtin_ia32_vpshldq128:
16733 case X86::BI__builtin_ia32_vpshldq256:
16734 case X86::BI__builtin_ia32_vpshldq512:
16735 case X86::BI__builtin_ia32_vpshldw128:
16736 case X86::BI__builtin_ia32_vpshldw256:
16737 case X86::BI__builtin_ia32_vpshldw512:
16738 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16739
16740 case X86::BI__builtin_ia32_vpshrdd128:
16741 case X86::BI__builtin_ia32_vpshrdd256:
16742 case X86::BI__builtin_ia32_vpshrdd512:
16743 case X86::BI__builtin_ia32_vpshrdq128:
16744 case X86::BI__builtin_ia32_vpshrdq256:
16745 case X86::BI__builtin_ia32_vpshrdq512:
16746 case X86::BI__builtin_ia32_vpshrdw128:
16747 case X86::BI__builtin_ia32_vpshrdw256:
16748 case X86::BI__builtin_ia32_vpshrdw512:
16749 // Ops 0 and 1 are swapped.
16750 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16751
16752 case X86::BI__builtin_ia32_vpshldvd128:
16753 case X86::BI__builtin_ia32_vpshldvd256:
16754 case X86::BI__builtin_ia32_vpshldvd512:
16755 case X86::BI__builtin_ia32_vpshldvq128:
16756 case X86::BI__builtin_ia32_vpshldvq256:
16757 case X86::BI__builtin_ia32_vpshldvq512:
16758 case X86::BI__builtin_ia32_vpshldvw128:
16759 case X86::BI__builtin_ia32_vpshldvw256:
16760 case X86::BI__builtin_ia32_vpshldvw512:
16761 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16762
16763 case X86::BI__builtin_ia32_vpshrdvd128:
16764 case X86::BI__builtin_ia32_vpshrdvd256:
16765 case X86::BI__builtin_ia32_vpshrdvd512:
16766 case X86::BI__builtin_ia32_vpshrdvq128:
16767 case X86::BI__builtin_ia32_vpshrdvq256:
16768 case X86::BI__builtin_ia32_vpshrdvq512:
16769 case X86::BI__builtin_ia32_vpshrdvw128:
16770 case X86::BI__builtin_ia32_vpshrdvw256:
16771 case X86::BI__builtin_ia32_vpshrdvw512:
16772 // Ops 0 and 1 are swapped.
16773 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16774
16775 // Reductions
16776 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16777 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16778 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16779 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16780 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16781 Function *F =
16782 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16783 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16784 Builder.getFastMathFlags().setAllowReassoc();
16785 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16786 }
16787 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16788 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16789 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16790 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16791 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16792 Function *F =
16793 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16794 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16795 Builder.getFastMathFlags().setAllowReassoc();
16796 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16797 }
16798 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16799 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16800 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16801 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16802 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16803 Function *F =
16804 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16805 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16806 Builder.getFastMathFlags().setNoNaNs();
16807 return Builder.CreateCall(F, {Ops[0]});
16808 }
16809 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16810 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16811 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16812 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16813 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16814 Function *F =
16815 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16816 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16817 Builder.getFastMathFlags().setNoNaNs();
16818 return Builder.CreateCall(F, {Ops[0]});
16819 }
16820
16821 case X86::BI__builtin_ia32_rdrand16_step:
16822 case X86::BI__builtin_ia32_rdrand32_step:
16823 case X86::BI__builtin_ia32_rdrand64_step:
16824 case X86::BI__builtin_ia32_rdseed16_step:
16825 case X86::BI__builtin_ia32_rdseed32_step:
16826 case X86::BI__builtin_ia32_rdseed64_step: {
16827 Intrinsic::ID ID;
16828 switch (BuiltinID) {
16829 default: llvm_unreachable("Unsupported intrinsic!");
16830 case X86::BI__builtin_ia32_rdrand16_step:
16831 ID = Intrinsic::x86_rdrand_16;
16832 break;
16833 case X86::BI__builtin_ia32_rdrand32_step:
16834 ID = Intrinsic::x86_rdrand_32;
16835 break;
16836 case X86::BI__builtin_ia32_rdrand64_step:
16837 ID = Intrinsic::x86_rdrand_64;
16838 break;
16839 case X86::BI__builtin_ia32_rdseed16_step:
16840 ID = Intrinsic::x86_rdseed_16;
16841 break;
16842 case X86::BI__builtin_ia32_rdseed32_step:
16843 ID = Intrinsic::x86_rdseed_32;
16844 break;
16845 case X86::BI__builtin_ia32_rdseed64_step:
16846 ID = Intrinsic::x86_rdseed_64;
16847 break;
16848 }
16849
16850 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16851 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16852 Ops[0]);
16853 return Builder.CreateExtractValue(Call, 1);
16854 }
16855 case X86::BI__builtin_ia32_addcarryx_u32:
16856 case X86::BI__builtin_ia32_addcarryx_u64:
16857 case X86::BI__builtin_ia32_subborrow_u32:
16858 case X86::BI__builtin_ia32_subborrow_u64: {
16859 Intrinsic::ID IID;
16860 switch (BuiltinID) {
16861 default: llvm_unreachable("Unsupported intrinsic!");
16862 case X86::BI__builtin_ia32_addcarryx_u32:
16863 IID = Intrinsic::x86_addcarry_32;
16864 break;
16865 case X86::BI__builtin_ia32_addcarryx_u64:
16866 IID = Intrinsic::x86_addcarry_64;
16867 break;
16868 case X86::BI__builtin_ia32_subborrow_u32:
16869 IID = Intrinsic::x86_subborrow_32;
16870 break;
16871 case X86::BI__builtin_ia32_subborrow_u64:
16872 IID = Intrinsic::x86_subborrow_64;
16873 break;
16874 }
16875
16876 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16877 { Ops[0], Ops[1], Ops[2] });
16878 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16879 Ops[3]);
16880 return Builder.CreateExtractValue(Call, 0);
16881 }
16882
16883 case X86::BI__builtin_ia32_fpclassps128_mask:
16884 case X86::BI__builtin_ia32_fpclassps256_mask:
16885 case X86::BI__builtin_ia32_fpclassps512_mask:
16886 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16887 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16888 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16889 case X86::BI__builtin_ia32_fpclassph128_mask:
16890 case X86::BI__builtin_ia32_fpclassph256_mask:
16891 case X86::BI__builtin_ia32_fpclassph512_mask:
16892 case X86::BI__builtin_ia32_fpclasspd128_mask:
16893 case X86::BI__builtin_ia32_fpclasspd256_mask:
16894 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16895 unsigned NumElts =
16896 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16897 Value *MaskIn = Ops[2];
16898 Ops.erase(&Ops[2]);
16899
16900 Intrinsic::ID ID;
16901 switch (BuiltinID) {
16902 default: llvm_unreachable("Unsupported intrinsic!");
16903 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16904 ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
16905 break;
16906 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16907 ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
16908 break;
16909 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16910 ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
16911 break;
16912 case X86::BI__builtin_ia32_fpclassph128_mask:
16913 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16914 break;
16915 case X86::BI__builtin_ia32_fpclassph256_mask:
16916 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16917 break;
16918 case X86::BI__builtin_ia32_fpclassph512_mask:
16919 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16920 break;
16921 case X86::BI__builtin_ia32_fpclassps128_mask:
16922 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16923 break;
16924 case X86::BI__builtin_ia32_fpclassps256_mask:
16925 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16926 break;
16927 case X86::BI__builtin_ia32_fpclassps512_mask:
16928 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16929 break;
16930 case X86::BI__builtin_ia32_fpclasspd128_mask:
16931 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16932 break;
16933 case X86::BI__builtin_ia32_fpclasspd256_mask:
16934 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16935 break;
16936 case X86::BI__builtin_ia32_fpclasspd512_mask:
16937 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16938 break;
16939 }
16940
16941 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16942 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16943 }
16944
16945 case X86::BI__builtin_ia32_vp2intersect_q_512:
16946 case X86::BI__builtin_ia32_vp2intersect_q_256:
16947 case X86::BI__builtin_ia32_vp2intersect_q_128:
16948 case X86::BI__builtin_ia32_vp2intersect_d_512:
16949 case X86::BI__builtin_ia32_vp2intersect_d_256:
16950 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16951 unsigned NumElts =
16952 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16953 Intrinsic::ID ID;
16954
16955 switch (BuiltinID) {
16956 default: llvm_unreachable("Unsupported intrinsic!");
16957 case X86::BI__builtin_ia32_vp2intersect_q_512:
16958 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16959 break;
16960 case X86::BI__builtin_ia32_vp2intersect_q_256:
16961 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16962 break;
16963 case X86::BI__builtin_ia32_vp2intersect_q_128:
16964 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16965 break;
16966 case X86::BI__builtin_ia32_vp2intersect_d_512:
16967 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16968 break;
16969 case X86::BI__builtin_ia32_vp2intersect_d_256:
16970 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16971 break;
16972 case X86::BI__builtin_ia32_vp2intersect_d_128:
16973 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16974 break;
16975 }
16976
16977 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16978 Value *Result = Builder.CreateExtractValue(Call, 0);
16979 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16981
16982 Result = Builder.CreateExtractValue(Call, 1);
16983 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16985 }
16986
16987 case X86::BI__builtin_ia32_vpmultishiftqb128:
16988 case X86::BI__builtin_ia32_vpmultishiftqb256:
16989 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16990 Intrinsic::ID ID;
16991 switch (BuiltinID) {
16992 default: llvm_unreachable("Unsupported intrinsic!");
16993 case X86::BI__builtin_ia32_vpmultishiftqb128:
16994 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16995 break;
16996 case X86::BI__builtin_ia32_vpmultishiftqb256:
16997 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16998 break;
16999 case X86::BI__builtin_ia32_vpmultishiftqb512:
17000 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
17001 break;
17002 }
17003
17004 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
17005 }
17006
17007 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
17008 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
17009 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
17010 unsigned NumElts =
17011 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17012 Value *MaskIn = Ops[2];
17013 Ops.erase(&Ops[2]);
17014
17015 Intrinsic::ID ID;
17016 switch (BuiltinID) {
17017 default: llvm_unreachable("Unsupported intrinsic!");
17018 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
17019 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
17020 break;
17021 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
17022 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
17023 break;
17024 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
17025 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
17026 break;
17027 }
17028
17029 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
17030 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
17031 }
17032
17033 // packed comparison intrinsics
17034 case X86::BI__builtin_ia32_cmpeqps:
17035 case X86::BI__builtin_ia32_cmpeqpd:
17036 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
17037 case X86::BI__builtin_ia32_cmpltps:
17038 case X86::BI__builtin_ia32_cmpltpd:
17039 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
17040 case X86::BI__builtin_ia32_cmpleps:
17041 case X86::BI__builtin_ia32_cmplepd:
17042 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
17043 case X86::BI__builtin_ia32_cmpunordps:
17044 case X86::BI__builtin_ia32_cmpunordpd:
17045 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
17046 case X86::BI__builtin_ia32_cmpneqps:
17047 case X86::BI__builtin_ia32_cmpneqpd:
17048 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
17049 case X86::BI__builtin_ia32_cmpnltps:
17050 case X86::BI__builtin_ia32_cmpnltpd:
17051 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
17052 case X86::BI__builtin_ia32_cmpnleps:
17053 case X86::BI__builtin_ia32_cmpnlepd:
17054 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
17055 case X86::BI__builtin_ia32_cmpordps:
17056 case X86::BI__builtin_ia32_cmpordpd:
17057 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
17058 case X86::BI__builtin_ia32_cmpph128_mask:
17059 case X86::BI__builtin_ia32_cmpph256_mask:
17060 case X86::BI__builtin_ia32_cmpph512_mask:
17061 case X86::BI__builtin_ia32_cmpps128_mask:
17062 case X86::BI__builtin_ia32_cmpps256_mask:
17063 case X86::BI__builtin_ia32_cmpps512_mask:
17064 case X86::BI__builtin_ia32_cmppd128_mask:
17065 case X86::BI__builtin_ia32_cmppd256_mask:
17066 case X86::BI__builtin_ia32_cmppd512_mask:
17067 case X86::BI__builtin_ia32_vcmppd256_round_mask:
17068 case X86::BI__builtin_ia32_vcmpps256_round_mask:
17069 case X86::BI__builtin_ia32_vcmpph256_round_mask:
17070 case X86::BI__builtin_ia32_vcmppbf16512_mask:
17071 case X86::BI__builtin_ia32_vcmppbf16256_mask:
17072 case X86::BI__builtin_ia32_vcmppbf16128_mask:
17073 IsMaskFCmp = true;
17074 [[fallthrough]];
17075 case X86::BI__builtin_ia32_cmpps:
17076 case X86::BI__builtin_ia32_cmpps256:
17077 case X86::BI__builtin_ia32_cmppd:
17078 case X86::BI__builtin_ia32_cmppd256: {
17079 // Lowering vector comparisons to fcmp instructions, while
17080 // ignoring signalling behaviour requested
17081 // ignoring rounding mode requested
17082 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
17083
17084 // The third argument is the comparison condition, and integer in the
17085 // range [0, 31]
17086 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
17087
17088 // Lowering to IR fcmp instruction.
17089 // Ignoring requested signaling behaviour,
17090 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
17091 FCmpInst::Predicate Pred;
17092 bool IsSignaling;
17093 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
17094 // behavior is inverted. We'll handle that after the switch.
17095 switch (CC & 0xf) {
17096 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
17097 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
17098 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
17099 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
17100 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
17101 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
17102 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
17103 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
17104 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
17105 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
17106 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
17107 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
17108 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
17109 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
17110 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
17111 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
17112 default: llvm_unreachable("Unhandled CC");
17113 }
17114
17115 // Invert the signalling behavior for 16-31.
17116 if (CC & 0x10)
17117 IsSignaling = !IsSignaling;
17118
17119 // If the predicate is true or false and we're using constrained intrinsics,
17120 // we don't have a compare intrinsic we can use. Just use the legacy X86
17121 // specific intrinsic.
17122 // If the intrinsic is mask enabled and we're using constrained intrinsics,
17123 // use the legacy X86 specific intrinsic.
17124 if (Builder.getIsFPConstrained() &&
17125 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
17126 IsMaskFCmp)) {
17127
17128 Intrinsic::ID IID;
17129 switch (BuiltinID) {
17130 default: llvm_unreachable("Unexpected builtin");
17131 case X86::BI__builtin_ia32_cmpps:
17132 IID = Intrinsic::x86_sse_cmp_ps;
17133 break;
17134 case X86::BI__builtin_ia32_cmpps256:
17135 IID = Intrinsic::x86_avx_cmp_ps_256;
17136 break;
17137 case X86::BI__builtin_ia32_cmppd:
17138 IID = Intrinsic::x86_sse2_cmp_pd;
17139 break;
17140 case X86::BI__builtin_ia32_cmppd256:
17141 IID = Intrinsic::x86_avx_cmp_pd_256;
17142 break;
17143 case X86::BI__builtin_ia32_cmpph128_mask:
17144 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
17145 break;
17146 case X86::BI__builtin_ia32_cmpph256_mask:
17147 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
17148 break;
17149 case X86::BI__builtin_ia32_cmpph512_mask:
17150 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
17151 break;
17152 case X86::BI__builtin_ia32_cmpps512_mask:
17153 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
17154 break;
17155 case X86::BI__builtin_ia32_cmppd512_mask:
17156 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
17157 break;
17158 case X86::BI__builtin_ia32_cmpps128_mask:
17159 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
17160 break;
17161 case X86::BI__builtin_ia32_cmpps256_mask:
17162 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
17163 break;
17164 case X86::BI__builtin_ia32_cmppd128_mask:
17165 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
17166 break;
17167 case X86::BI__builtin_ia32_cmppd256_mask:
17168 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
17169 break;
17170 }
17171
17172 Function *Intr = CGM.getIntrinsic(IID);
17173 if (IsMaskFCmp) {
17174 unsigned NumElts =
17175 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17176 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
17177 Value *Cmp = Builder.CreateCall(Intr, Ops);
17178 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
17179 }
17180
17181 return Builder.CreateCall(Intr, Ops);
17182 }
17183
17184 // Builtins without the _mask suffix return a vector of integers
17185 // of the same width as the input vectors
17186 if (IsMaskFCmp) {
17187 // We ignore SAE if strict FP is disabled. We only keep precise
17188 // exception behavior under strict FP.
17189 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
17190 // object will be required.
17191 unsigned NumElts =
17192 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17193 Value *Cmp;
17194 if (IsSignaling)
17195 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
17196 else
17197 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
17198 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
17199 }
17200
17201 return getVectorFCmpIR(Pred, IsSignaling);
17202 }
17203
17204 // SSE scalar comparison intrinsics
17205 case X86::BI__builtin_ia32_cmpeqss:
17206 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
17207 case X86::BI__builtin_ia32_cmpltss:
17208 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
17209 case X86::BI__builtin_ia32_cmpless:
17210 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
17211 case X86::BI__builtin_ia32_cmpunordss:
17212 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
17213 case X86::BI__builtin_ia32_cmpneqss:
17214 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
17215 case X86::BI__builtin_ia32_cmpnltss:
17216 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
17217 case X86::BI__builtin_ia32_cmpnless:
17218 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
17219 case X86::BI__builtin_ia32_cmpordss:
17220 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
17221 case X86::BI__builtin_ia32_cmpeqsd:
17222 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
17223 case X86::BI__builtin_ia32_cmpltsd:
17224 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
17225 case X86::BI__builtin_ia32_cmplesd:
17226 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
17227 case X86::BI__builtin_ia32_cmpunordsd:
17228 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
17229 case X86::BI__builtin_ia32_cmpneqsd:
17230 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
17231 case X86::BI__builtin_ia32_cmpnltsd:
17232 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
17233 case X86::BI__builtin_ia32_cmpnlesd:
17234 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
17235 case X86::BI__builtin_ia32_cmpordsd:
17236 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
17237
17238 // f16c half2float intrinsics
17239 case X86::BI__builtin_ia32_vcvtph2ps:
17240 case X86::BI__builtin_ia32_vcvtph2ps256:
17241 case X86::BI__builtin_ia32_vcvtph2ps_mask:
17242 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
17243 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
17244 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
17245 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
17246 }
17247
17248 // AVX512 bf16 intrinsics
17249 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
17250 Ops[2] = getMaskVecValue(
17251 *this, Ops[2],
17252 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
17253 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
17254 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17255 }
17256 case X86::BI__builtin_ia32_cvtsbf162ss_32:
17257 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
17258
17259 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17260 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
17261 Intrinsic::ID IID;
17262 switch (BuiltinID) {
17263 default: llvm_unreachable("Unsupported intrinsic!");
17264 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17265 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
17266 break;
17267 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
17268 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
17269 break;
17270 }
17271 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
17272 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
17273 }
17274
17275 case X86::BI__cpuid:
17276 case X86::BI__cpuidex: {
17277 Value *FuncId = EmitScalarExpr(E->getArg(1));
17278 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
17279 ? EmitScalarExpr(E->getArg(2))
17280 : llvm::ConstantInt::get(Int32Ty, 0);
17281
17282 llvm::StructType *CpuidRetTy =
17283 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
17284 llvm::FunctionType *FTy =
17285 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
17286
17287 StringRef Asm, Constraints;
17288 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
17289 Asm = "cpuid";
17290 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
17291 } else {
17292 // x86-64 uses %rbx as the base register, so preserve it.
17293 Asm = "xchgq %rbx, ${1:q}\n"
17294 "cpuid\n"
17295 "xchgq %rbx, ${1:q}";
17296 Constraints = "={ax},=r,={cx},={dx},0,2";
17297 }
17298
17299 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
17300 /*hasSideEffects=*/false);
17301 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
17302 Value *BasePtr = EmitScalarExpr(E->getArg(0));
17303 Value *Store = nullptr;
17304 for (unsigned i = 0; i < 4; i++) {
17305 Value *Extracted = Builder.CreateExtractValue(IACall, i);
17306 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
17307 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
17308 }
17309
17310 // Return the last store instruction to signal that we have emitted the
17311 // the intrinsic.
17312 return Store;
17313 }
17314
17315 case X86::BI__emul:
17316 case X86::BI__emulu: {
17317 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
17318 bool isSigned = (BuiltinID == X86::BI__emul);
17319 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
17320 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
17321 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
17322 }
17323 case X86::BI__mulh:
17324 case X86::BI__umulh:
17325 case X86::BI_mul128:
17326 case X86::BI_umul128: {
17327 llvm::Type *ResType = ConvertType(E->getType());
17328 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17329
17330 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
17331 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
17332 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
17333
17334 Value *MulResult, *HigherBits;
17335 if (IsSigned) {
17336 MulResult = Builder.CreateNSWMul(LHS, RHS);
17337 HigherBits = Builder.CreateAShr(MulResult, 64);
17338 } else {
17339 MulResult = Builder.CreateNUWMul(LHS, RHS);
17340 HigherBits = Builder.CreateLShr(MulResult, 64);
17341 }
17342 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
17343
17344 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
17345 return HigherBits;
17346
17347 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
17348 Builder.CreateStore(HigherBits, HighBitsAddress);
17349 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
17350 }
17351
17352 case X86::BI__faststorefence: {
17353 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17354 llvm::SyncScope::System);
17355 }
17356 case X86::BI__shiftleft128:
17357 case X86::BI__shiftright128: {
17358 llvm::Function *F = CGM.getIntrinsic(
17359 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
17360 Int64Ty);
17361 // Flip low/high ops and zero-extend amount to matching type.
17362 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
17363 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
17364 std::swap(Ops[0], Ops[1]);
17365 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
17366 return Builder.CreateCall(F, Ops);
17367 }
17368 case X86::BI_ReadWriteBarrier:
17369 case X86::BI_ReadBarrier:
17370 case X86::BI_WriteBarrier: {
17371 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17372 llvm::SyncScope::SingleThread);
17373 }
17374
17375 case X86::BI_AddressOfReturnAddress: {
17376 Function *F =
17377 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
17378 return Builder.CreateCall(F);
17379 }
17380 case X86::BI__stosb: {
17381 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
17382 // instruction, but it will create a memset that won't be optimized away.
17383 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
17384 }
17385 // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
17386 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17387 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17388 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17389 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17390 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17391 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17392 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17393 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
17394 Intrinsic::ID IID;
17395 switch (BuiltinID) {
17396 default:
17397 llvm_unreachable("Unsupported intrinsic!");
17398 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17399 IID = Intrinsic::x86_t2rpntlvwz0_internal;
17400 break;
17401 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17402 IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
17403 break;
17404 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17405 IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
17406 break;
17407 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17408 IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
17409 break;
17410 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17411 IID = Intrinsic::x86_t2rpntlvwz1_internal;
17412 break;
17413 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17414 IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
17415 break;
17416 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17417 IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
17418 break;
17419 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
17420 IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
17421 break;
17422 }
17423
17424 // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
17425 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
17426 {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
17427
17428 auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
17429 assert(PtrTy && "arg3 must be of pointer type");
17430 QualType PtreeTy = PtrTy->getPointeeType();
17431 llvm::Type *TyPtee = ConvertType(PtreeTy);
17432
17433 // Bitcast amx type (x86_amx) to vector type (256 x i32)
17434 // Then store tile0 into DstPtr0
17435 Value *T0 = Builder.CreateExtractValue(Call, 0);
17436 Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17437 {TyPtee}, {T0});
17438 Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
17439
17440 // Then store tile1 into DstPtr1
17441 Value *T1 = Builder.CreateExtractValue(Call, 1);
17442 Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17443 {TyPtee}, {T1});
17444 Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
17445
17446 // Note: Here we escape directly use x86_tilestored64_internal to store
17447 // the results due to it can't make sure the Mem written scope. This may
17448 // cause shapes reloads after first amx intrinsic, which current amx reg-
17449 // ister allocation has no ability to handle it.
17450
17451 return Store;
17452 }
17453 case X86::BI__ud2:
17454 // llvm.trap makes a ud2a instruction on x86.
17455 return EmitTrapCall(Intrinsic::trap);
17456 case X86::BI__int2c: {
17457 // This syscall signals a driver assertion failure in x86 NT kernels.
17458 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
17459 llvm::InlineAsm *IA =
17460 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
17461 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
17462 getLLVMContext(), llvm::AttributeList::FunctionIndex,
17463 llvm::Attribute::NoReturn);
17464 llvm::CallInst *CI = Builder.CreateCall(IA);
17465 CI->setAttributes(NoReturnAttr);
17466 return CI;
17467 }
17468 case X86::BI__readfsbyte:
17469 case X86::BI__readfsword:
17470 case X86::BI__readfsdword:
17471 case X86::BI__readfsqword: {
17472 llvm::Type *IntTy = ConvertType(E->getType());
17473 Value *Ptr = Builder.CreateIntToPtr(
17474 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
17475 LoadInst *Load = Builder.CreateAlignedLoad(
17476 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17477 Load->setVolatile(true);
17478 return Load;
17479 }
17480 case X86::BI__readgsbyte:
17481 case X86::BI__readgsword:
17482 case X86::BI__readgsdword:
17483 case X86::BI__readgsqword: {
17484 llvm::Type *IntTy = ConvertType(E->getType());
17485 Value *Ptr = Builder.CreateIntToPtr(
17486 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
17487 LoadInst *Load = Builder.CreateAlignedLoad(
17488 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17489 Load->setVolatile(true);
17490 return Load;
17491 }
17492 case X86::BI__builtin_ia32_encodekey128_u32: {
17493 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
17494
17495 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
17496
17497 for (int i = 0; i < 3; ++i) {
17498 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17499 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
17500 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17501 }
17502
17503 return Builder.CreateExtractValue(Call, 0);
17504 }
17505 case X86::BI__builtin_ia32_encodekey256_u32: {
17506 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
17507
17508 Value *Call =
17509 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
17510
17511 for (int i = 0; i < 4; ++i) {
17512 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17513 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
17514 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17515 }
17516
17517 return Builder.CreateExtractValue(Call, 0);
17518 }
17519 case X86::BI__builtin_ia32_aesenc128kl_u8:
17520 case X86::BI__builtin_ia32_aesdec128kl_u8:
17521 case X86::BI__builtin_ia32_aesenc256kl_u8:
17522 case X86::BI__builtin_ia32_aesdec256kl_u8: {
17523 Intrinsic::ID IID;
17524 StringRef BlockName;
17525 switch (BuiltinID) {
17526 default:
17527 llvm_unreachable("Unexpected builtin");
17528 case X86::BI__builtin_ia32_aesenc128kl_u8:
17529 IID = Intrinsic::x86_aesenc128kl;
17530 BlockName = "aesenc128kl";
17531 break;
17532 case X86::BI__builtin_ia32_aesdec128kl_u8:
17533 IID = Intrinsic::x86_aesdec128kl;
17534 BlockName = "aesdec128kl";
17535 break;
17536 case X86::BI__builtin_ia32_aesenc256kl_u8:
17537 IID = Intrinsic::x86_aesenc256kl;
17538 BlockName = "aesenc256kl";
17539 break;
17540 case X86::BI__builtin_ia32_aesdec256kl_u8:
17541 IID = Intrinsic::x86_aesdec256kl;
17542 BlockName = "aesdec256kl";
17543 break;
17544 }
17545
17546 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
17547
17548 BasicBlock *NoError =
17549 createBasicBlock(BlockName + "_no_error", this->CurFn);
17550 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17551 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17552
17553 Value *Ret = Builder.CreateExtractValue(Call, 0);
17554 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17555 Value *Out = Builder.CreateExtractValue(Call, 1);
17556 Builder.CreateCondBr(Succ, NoError, Error);
17557
17558 Builder.SetInsertPoint(NoError);
17560 Builder.CreateBr(End);
17561
17562 Builder.SetInsertPoint(Error);
17563 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17564 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
17565 Builder.CreateBr(End);
17566
17567 Builder.SetInsertPoint(End);
17568 return Builder.CreateExtractValue(Call, 0);
17569 }
17570 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17571 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17572 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17573 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
17574 Intrinsic::ID IID;
17575 StringRef BlockName;
17576 switch (BuiltinID) {
17577 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17578 IID = Intrinsic::x86_aesencwide128kl;
17579 BlockName = "aesencwide128kl";
17580 break;
17581 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17582 IID = Intrinsic::x86_aesdecwide128kl;
17583 BlockName = "aesdecwide128kl";
17584 break;
17585 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17586 IID = Intrinsic::x86_aesencwide256kl;
17587 BlockName = "aesencwide256kl";
17588 break;
17589 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
17590 IID = Intrinsic::x86_aesdecwide256kl;
17591 BlockName = "aesdecwide256kl";
17592 break;
17593 }
17594
17595 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
17596 Value *InOps[9];
17597 InOps[0] = Ops[2];
17598 for (int i = 0; i != 8; ++i) {
17599 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
17600 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
17601 }
17602
17603 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
17604
17605 BasicBlock *NoError =
17606 createBasicBlock(BlockName + "_no_error", this->CurFn);
17607 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17608 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17609
17610 Value *Ret = Builder.CreateExtractValue(Call, 0);
17611 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17612 Builder.CreateCondBr(Succ, NoError, Error);
17613
17614 Builder.SetInsertPoint(NoError);
17615 for (int i = 0; i != 8; ++i) {
17616 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17617 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
17618 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
17619 }
17620 Builder.CreateBr(End);
17621
17622 Builder.SetInsertPoint(Error);
17623 for (int i = 0; i != 8; ++i) {
17624 Value *Out = Builder.CreateExtractValue(Call, i + 1);
17625 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17626 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
17627 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
17628 }
17629 Builder.CreateBr(End);
17630
17631 Builder.SetInsertPoint(End);
17632 return Builder.CreateExtractValue(Call, 0);
17633 }
17634 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
17635 IsConjFMA = true;
17636 [[fallthrough]];
17637 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
17638 Intrinsic::ID IID = IsConjFMA
17639 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
17640 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
17641 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17642 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17643 }
17644 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
17645 IsConjFMA = true;
17646 LLVM_FALLTHROUGH;
17647 case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
17648 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
17649 : Intrinsic::x86_avx10_mask_vfmaddcph256;
17650 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17651 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17652 }
17653 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
17654 IsConjFMA = true;
17655 [[fallthrough]];
17656 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
17657 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17658 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17659 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17660 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
17661 return EmitX86Select(*this, And, Call, Ops[0]);
17662 }
17663 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
17664 IsConjFMA = true;
17665 [[fallthrough]];
17666 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
17667 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17668 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17669 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17670 static constexpr int Mask[] = {0, 5, 6, 7};
17671 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
17672 }
17673 case X86::BI__builtin_ia32_prefetchi:
17674 return Builder.CreateCall(
17675 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
17676 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
17677 llvm::ConstantInt::get(Int32Ty, 0)});
17678 }
17679}
17680
17681Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
17682 const CallExpr *E) {
17683 // Do not emit the builtin arguments in the arguments of a function call,
17684 // because the evaluation order of function arguments is not specified in C++.
17685 // This is important when testing to ensure the arguments are emitted in the
17686 // same order every time. Eg:
17687 // Instead of:
17688 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
17689 // EmitScalarExpr(E->getArg(1)), "swdiv");
17690 // Use:
17691 // Value *Op0 = EmitScalarExpr(E->getArg(0));
17692 // Value *Op1 = EmitScalarExpr(E->getArg(1));
17693 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
17694
17695 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17696
17697#include "llvm/TargetParser/PPCTargetParser.def"
17698 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
17699 unsigned Mask, CmpInst::Predicate CompOp,
17700 unsigned OpValue) -> Value * {
17701 if (SupportMethod == BUILTIN_PPC_FALSE)
17702 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17703
17704 if (SupportMethod == BUILTIN_PPC_TRUE)
17705 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
17706
17707 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
17708
17709 llvm::Value *FieldValue = nullptr;
17710 if (SupportMethod == USE_SYS_CONF) {
17711 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
17712 llvm::Constant *SysConf =
17713 CGM.CreateRuntimeVariable(STy, "_system_configuration");
17714
17715 // Grab the appropriate field from _system_configuration.
17716 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
17717 ConstantInt::get(Int32Ty, FieldIdx)};
17718
17719 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
17720 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
17722 } else if (SupportMethod == SYS_CALL) {
17723 llvm::FunctionType *FTy =
17724 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
17725 llvm::FunctionCallee Func =
17726 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
17727
17728 FieldValue =
17729 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
17730 }
17731 assert(FieldValue &&
17732 "SupportMethod value is not defined in PPCTargetParser.def.");
17733
17734 if (Mask)
17735 FieldValue = Builder.CreateAnd(FieldValue, Mask);
17736
17737 llvm::Type *ValueType = FieldValue->getType();
17738 bool IsValueType64Bit = ValueType->isIntegerTy(64);
17739 assert(
17740 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
17741 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17742
17743 return Builder.CreateICmp(
17744 CompOp, FieldValue,
17745 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
17746 };
17747
17748 switch (BuiltinID) {
17749 default: return nullptr;
17750
17751 case Builtin::BI__builtin_cpu_is: {
17752 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17753 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17754 llvm::Triple Triple = getTarget().getTriple();
17755
17756 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
17757 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
17758
17759 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
17760 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
17761#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17762 AIXID) \
17763 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17764#include "llvm/TargetParser/PPCTargetParser.def"
17765 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
17766 BUILTIN_PPC_UNSUPPORTED, 0}));
17767
17768 if (Triple.isOSAIX()) {
17769 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17770 "Invalid CPU name. Missed by SemaChecking?");
17771 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
17772 ICmpInst::ICMP_EQ, AIXIDValue);
17773 }
17774
17775 assert(Triple.isOSLinux() &&
17776 "__builtin_cpu_is() is only supported for AIX and Linux.");
17777
17778 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17779 "Invalid CPU name. Missed by SemaChecking?");
17780
17781 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
17782 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17783
17784 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
17785 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17786 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
17787 return Builder.CreateICmpEQ(TheCall,
17788 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
17789 }
17790 case Builtin::BI__builtin_cpu_supports: {
17791 llvm::Triple Triple = getTarget().getTriple();
17792 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17793 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17794 if (Triple.isOSAIX()) {
17795 unsigned SupportMethod, FieldIdx, Mask, Value;
17796 CmpInst::Predicate CompOp;
17797 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
17798 unsigned>
17799 CPUSupportType;
17800 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
17801 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
17802#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17803 VALUE) \
17804 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17805#include "llvm/TargetParser/PPCTargetParser.def"
17806 .Default({BUILTIN_PPC_FALSE, 0, 0,
17807 CmpInst::Predicate(), 0}));
17808 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17809 Value);
17810 }
17811
17812 assert(Triple.isOSLinux() &&
17813 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17814 unsigned FeatureWord;
17815 unsigned BitMask;
17816 std::tie(FeatureWord, BitMask) =
17817 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17818#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17819 .Case(Name, {FA_WORD, Bitmask})
17820#include "llvm/TargetParser/PPCTargetParser.def"
17821 .Default({0, 0});
17822 if (!BitMask)
17823 return Builder.getFalse();
17824 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
17825 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17826 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
17827 Value *Mask =
17828 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
17829 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
17830#undef PPC_FAWORD_HWCAP
17831#undef PPC_FAWORD_HWCAP2
17832#undef PPC_FAWORD_CPUID
17833 }
17834
17835 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17836 // call __builtin_readcyclecounter.
17837 case PPC::BI__builtin_ppc_get_timebase:
17838 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
17839
17840 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17841 case PPC::BI__builtin_altivec_lvx:
17842 case PPC::BI__builtin_altivec_lvxl:
17843 case PPC::BI__builtin_altivec_lvebx:
17844 case PPC::BI__builtin_altivec_lvehx:
17845 case PPC::BI__builtin_altivec_lvewx:
17846 case PPC::BI__builtin_altivec_lvsl:
17847 case PPC::BI__builtin_altivec_lvsr:
17848 case PPC::BI__builtin_vsx_lxvd2x:
17849 case PPC::BI__builtin_vsx_lxvw4x:
17850 case PPC::BI__builtin_vsx_lxvd2x_be:
17851 case PPC::BI__builtin_vsx_lxvw4x_be:
17852 case PPC::BI__builtin_vsx_lxvl:
17853 case PPC::BI__builtin_vsx_lxvll:
17854 {
17856 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17857 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17858 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17859 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17860 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17861 Ops.pop_back();
17862 }
17863
17864 switch (BuiltinID) {
17865 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17866 case PPC::BI__builtin_altivec_lvx:
17867 ID = Intrinsic::ppc_altivec_lvx;
17868 break;
17869 case PPC::BI__builtin_altivec_lvxl:
17870 ID = Intrinsic::ppc_altivec_lvxl;
17871 break;
17872 case PPC::BI__builtin_altivec_lvebx:
17873 ID = Intrinsic::ppc_altivec_lvebx;
17874 break;
17875 case PPC::BI__builtin_altivec_lvehx:
17876 ID = Intrinsic::ppc_altivec_lvehx;
17877 break;
17878 case PPC::BI__builtin_altivec_lvewx:
17879 ID = Intrinsic::ppc_altivec_lvewx;
17880 break;
17881 case PPC::BI__builtin_altivec_lvsl:
17882 ID = Intrinsic::ppc_altivec_lvsl;
17883 break;
17884 case PPC::BI__builtin_altivec_lvsr:
17885 ID = Intrinsic::ppc_altivec_lvsr;
17886 break;
17887 case PPC::BI__builtin_vsx_lxvd2x:
17888 ID = Intrinsic::ppc_vsx_lxvd2x;
17889 break;
17890 case PPC::BI__builtin_vsx_lxvw4x:
17891 ID = Intrinsic::ppc_vsx_lxvw4x;
17892 break;
17893 case PPC::BI__builtin_vsx_lxvd2x_be:
17894 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17895 break;
17896 case PPC::BI__builtin_vsx_lxvw4x_be:
17897 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17898 break;
17899 case PPC::BI__builtin_vsx_lxvl:
17900 ID = Intrinsic::ppc_vsx_lxvl;
17901 break;
17902 case PPC::BI__builtin_vsx_lxvll:
17903 ID = Intrinsic::ppc_vsx_lxvll;
17904 break;
17905 }
17906 llvm::Function *F = CGM.getIntrinsic(ID);
17907 return Builder.CreateCall(F, Ops, "");
17908 }
17909
17910 // vec_st, vec_xst_be
17911 case PPC::BI__builtin_altivec_stvx:
17912 case PPC::BI__builtin_altivec_stvxl:
17913 case PPC::BI__builtin_altivec_stvebx:
17914 case PPC::BI__builtin_altivec_stvehx:
17915 case PPC::BI__builtin_altivec_stvewx:
17916 case PPC::BI__builtin_vsx_stxvd2x:
17917 case PPC::BI__builtin_vsx_stxvw4x:
17918 case PPC::BI__builtin_vsx_stxvd2x_be:
17919 case PPC::BI__builtin_vsx_stxvw4x_be:
17920 case PPC::BI__builtin_vsx_stxvl:
17921 case PPC::BI__builtin_vsx_stxvll:
17922 {
17924 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17925 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17926 Ops.push_back(EmitScalarExpr(E->getArg(2)));
17927 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17928 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17929 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17930 Ops.pop_back();
17931 }
17932
17933 switch (BuiltinID) {
17934 default: llvm_unreachable("Unsupported st intrinsic!");
17935 case PPC::BI__builtin_altivec_stvx:
17936 ID = Intrinsic::ppc_altivec_stvx;
17937 break;
17938 case PPC::BI__builtin_altivec_stvxl:
17939 ID = Intrinsic::ppc_altivec_stvxl;
17940 break;
17941 case PPC::BI__builtin_altivec_stvebx:
17942 ID = Intrinsic::ppc_altivec_stvebx;
17943 break;
17944 case PPC::BI__builtin_altivec_stvehx:
17945 ID = Intrinsic::ppc_altivec_stvehx;
17946 break;
17947 case PPC::BI__builtin_altivec_stvewx:
17948 ID = Intrinsic::ppc_altivec_stvewx;
17949 break;
17950 case PPC::BI__builtin_vsx_stxvd2x:
17951 ID = Intrinsic::ppc_vsx_stxvd2x;
17952 break;
17953 case PPC::BI__builtin_vsx_stxvw4x:
17954 ID = Intrinsic::ppc_vsx_stxvw4x;
17955 break;
17956 case PPC::BI__builtin_vsx_stxvd2x_be:
17957 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17958 break;
17959 case PPC::BI__builtin_vsx_stxvw4x_be:
17960 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17961 break;
17962 case PPC::BI__builtin_vsx_stxvl:
17963 ID = Intrinsic::ppc_vsx_stxvl;
17964 break;
17965 case PPC::BI__builtin_vsx_stxvll:
17966 ID = Intrinsic::ppc_vsx_stxvll;
17967 break;
17968 }
17969 llvm::Function *F = CGM.getIntrinsic(ID);
17970 return Builder.CreateCall(F, Ops, "");
17971 }
17972 case PPC::BI__builtin_vsx_ldrmb: {
17973 // Essentially boils down to performing an unaligned VMX load sequence so
17974 // as to avoid crossing a page boundary and then shuffling the elements
17975 // into the right side of the vector register.
17976 Value *Op0 = EmitScalarExpr(E->getArg(0));
17977 Value *Op1 = EmitScalarExpr(E->getArg(1));
17978 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17979 llvm::Type *ResTy = ConvertType(E->getType());
17980 bool IsLE = getTarget().isLittleEndian();
17981
17982 // If the user wants the entire vector, just load the entire vector.
17983 if (NumBytes == 16) {
17984 Value *LD =
17986 if (!IsLE)
17987 return LD;
17988
17989 // Reverse the bytes on LE.
17990 SmallVector<int, 16> RevMask;
17991 for (int Idx = 0; Idx < 16; Idx++)
17992 RevMask.push_back(15 - Idx);
17993 return Builder.CreateShuffleVector(LD, LD, RevMask);
17994 }
17995
17996 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17997 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17998 : Intrinsic::ppc_altivec_lvsl);
17999 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
18000 Value *HiMem = Builder.CreateGEP(
18001 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
18002 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
18003 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
18004 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
18005
18006 Op0 = IsLE ? HiLd : LoLd;
18007 Op1 = IsLE ? LoLd : HiLd;
18008 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
18009 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
18010
18011 if (IsLE) {
18012 SmallVector<int, 16> Consts;
18013 for (int Idx = 0; Idx < 16; Idx++) {
18014 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
18015 : 16 - (NumBytes - Idx);
18016 Consts.push_back(Val);
18017 }
18018 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
18019 Zero, Consts);
18020 }
18022 for (int Idx = 0; Idx < 16; Idx++)
18023 Consts.push_back(Builder.getInt8(NumBytes + Idx));
18024 Value *Mask2 = ConstantVector::get(Consts);
18025 return Builder.CreateBitCast(
18026 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
18027 }
18028 case PPC::BI__builtin_vsx_strmb: {
18029 Value *Op0 = EmitScalarExpr(E->getArg(0));
18030 Value *Op1 = EmitScalarExpr(E->getArg(1));
18031 Value *Op2 = EmitScalarExpr(E->getArg(2));
18032 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
18033 bool IsLE = getTarget().isLittleEndian();
18034 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
18035 // Storing the whole vector, simply store it on BE and reverse bytes and
18036 // store on LE.
18037 if (Width == 16) {
18038 Value *StVec = Op2;
18039 if (IsLE) {
18040 SmallVector<int, 16> RevMask;
18041 for (int Idx = 0; Idx < 16; Idx++)
18042 RevMask.push_back(15 - Idx);
18043 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
18044 }
18045 return Builder.CreateStore(
18046 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
18047 }
18048 auto *ConvTy = Int64Ty;
18049 unsigned NumElts = 0;
18050 switch (Width) {
18051 default:
18052 llvm_unreachable("width for stores must be a power of 2");
18053 case 8:
18054 ConvTy = Int64Ty;
18055 NumElts = 2;
18056 break;
18057 case 4:
18058 ConvTy = Int32Ty;
18059 NumElts = 4;
18060 break;
18061 case 2:
18062 ConvTy = Int16Ty;
18063 NumElts = 8;
18064 break;
18065 case 1:
18066 ConvTy = Int8Ty;
18067 NumElts = 16;
18068 break;
18069 }
18070 Value *Vec = Builder.CreateBitCast(
18071 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
18072 Value *Ptr =
18073 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
18074 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
18075 if (IsLE && Width > 1) {
18076 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
18077 Elt = Builder.CreateCall(F, Elt);
18078 }
18079 return Builder.CreateStore(
18080 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
18081 };
18082 unsigned Stored = 0;
18083 unsigned RemainingBytes = NumBytes;
18084 Value *Result;
18085 if (NumBytes == 16)
18086 return StoreSubVec(16, 0, 0);
18087 if (NumBytes >= 8) {
18088 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
18089 RemainingBytes -= 8;
18090 Stored += 8;
18091 }
18092 if (RemainingBytes >= 4) {
18093 Result = StoreSubVec(4, NumBytes - Stored - 4,
18094 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
18095 RemainingBytes -= 4;
18096 Stored += 4;
18097 }
18098 if (RemainingBytes >= 2) {
18099 Result = StoreSubVec(2, NumBytes - Stored - 2,
18100 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
18101 RemainingBytes -= 2;
18102 Stored += 2;
18103 }
18104 if (RemainingBytes)
18105 Result =
18106 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
18107 return Result;
18108 }
18109 // Square root
18110 case PPC::BI__builtin_vsx_xvsqrtsp:
18111 case PPC::BI__builtin_vsx_xvsqrtdp: {
18112 llvm::Type *ResultType = ConvertType(E->getType());
18113 Value *X = EmitScalarExpr(E->getArg(0));
18114 if (Builder.getIsFPConstrained()) {
18115 llvm::Function *F = CGM.getIntrinsic(
18116 Intrinsic::experimental_constrained_sqrt, ResultType);
18117 return Builder.CreateConstrainedFPCall(F, X);
18118 } else {
18119 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18120 return Builder.CreateCall(F, X);
18121 }
18122 }
18123 // Count leading zeros
18124 case PPC::BI__builtin_altivec_vclzb:
18125 case PPC::BI__builtin_altivec_vclzh:
18126 case PPC::BI__builtin_altivec_vclzw:
18127 case PPC::BI__builtin_altivec_vclzd: {
18128 llvm::Type *ResultType = ConvertType(E->getType());
18129 Value *X = EmitScalarExpr(E->getArg(0));
18130 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18131 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
18132 return Builder.CreateCall(F, {X, Undef});
18133 }
18134 case PPC::BI__builtin_altivec_vctzb:
18135 case PPC::BI__builtin_altivec_vctzh:
18136 case PPC::BI__builtin_altivec_vctzw:
18137 case PPC::BI__builtin_altivec_vctzd: {
18138 llvm::Type *ResultType = ConvertType(E->getType());
18139 Value *X = EmitScalarExpr(E->getArg(0));
18140 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18141 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
18142 return Builder.CreateCall(F, {X, Undef});
18143 }
18144 case PPC::BI__builtin_altivec_vinsd:
18145 case PPC::BI__builtin_altivec_vinsw:
18146 case PPC::BI__builtin_altivec_vinsd_elt:
18147 case PPC::BI__builtin_altivec_vinsw_elt: {
18148 llvm::Type *ResultType = ConvertType(E->getType());
18149 Value *Op0 = EmitScalarExpr(E->getArg(0));
18150 Value *Op1 = EmitScalarExpr(E->getArg(1));
18151 Value *Op2 = EmitScalarExpr(E->getArg(2));
18152
18153 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18154 BuiltinID == PPC::BI__builtin_altivec_vinsd);
18155
18156 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18157 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
18158
18159 // The third argument must be a compile time constant.
18160 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18161 assert(ArgCI &&
18162 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
18163
18164 // Valid value for the third argument is dependent on the input type and
18165 // builtin called.
18166 int ValidMaxValue = 0;
18167 if (IsUnaligned)
18168 ValidMaxValue = (Is32bit) ? 12 : 8;
18169 else
18170 ValidMaxValue = (Is32bit) ? 3 : 1;
18171
18172 // Get value of third argument.
18173 int64_t ConstArg = ArgCI->getSExtValue();
18174
18175 // Compose range checking error message.
18176 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
18177 RangeErrMsg += " number " + llvm::to_string(ConstArg);
18178 RangeErrMsg += " is outside of the valid range [0, ";
18179 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
18180
18181 // Issue error if third argument is not within the valid range.
18182 if (ConstArg < 0 || ConstArg > ValidMaxValue)
18183 CGM.Error(E->getExprLoc(), RangeErrMsg);
18184
18185 // Input to vec_replace_elt is an element index, convert to byte index.
18186 if (!IsUnaligned) {
18187 ConstArg *= Is32bit ? 4 : 8;
18188 // Fix the constant according to endianess.
18189 if (getTarget().isLittleEndian())
18190 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
18191 }
18192
18193 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
18194 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
18195 // Casting input to vector int as per intrinsic definition.
18196 Op0 =
18197 Is32bit
18198 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
18199 : Builder.CreateBitCast(Op0,
18200 llvm::FixedVectorType::get(Int64Ty, 2));
18201 return Builder.CreateBitCast(
18202 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
18203 }
18204 case PPC::BI__builtin_altivec_vadduqm:
18205 case PPC::BI__builtin_altivec_vsubuqm: {
18206 Value *Op0 = EmitScalarExpr(E->getArg(0));
18207 Value *Op1 = EmitScalarExpr(E->getArg(1));
18208 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
18209 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
18210 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
18211 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
18212 return Builder.CreateAdd(Op0, Op1, "vadduqm");
18213 else
18214 return Builder.CreateSub(Op0, Op1, "vsubuqm");
18215 }
18216 case PPC::BI__builtin_altivec_vaddcuq_c:
18217 case PPC::BI__builtin_altivec_vsubcuq_c: {
18219 Value *Op0 = EmitScalarExpr(E->getArg(0));
18220 Value *Op1 = EmitScalarExpr(E->getArg(1));
18221 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18222 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18223 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18224 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18225 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
18226 ? Intrinsic::ppc_altivec_vaddcuq
18227 : Intrinsic::ppc_altivec_vsubcuq;
18228 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18229 }
18230 case PPC::BI__builtin_altivec_vaddeuqm_c:
18231 case PPC::BI__builtin_altivec_vaddecuq_c:
18232 case PPC::BI__builtin_altivec_vsubeuqm_c:
18233 case PPC::BI__builtin_altivec_vsubecuq_c: {
18235 Value *Op0 = EmitScalarExpr(E->getArg(0));
18236 Value *Op1 = EmitScalarExpr(E->getArg(1));
18237 Value *Op2 = EmitScalarExpr(E->getArg(2));
18238 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18239 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18240 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18241 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18242 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
18243 switch (BuiltinID) {
18244 default:
18245 llvm_unreachable("Unsupported intrinsic!");
18246 case PPC::BI__builtin_altivec_vaddeuqm_c:
18247 ID = Intrinsic::ppc_altivec_vaddeuqm;
18248 break;
18249 case PPC::BI__builtin_altivec_vaddecuq_c:
18250 ID = Intrinsic::ppc_altivec_vaddecuq;
18251 break;
18252 case PPC::BI__builtin_altivec_vsubeuqm_c:
18253 ID = Intrinsic::ppc_altivec_vsubeuqm;
18254 break;
18255 case PPC::BI__builtin_altivec_vsubecuq_c:
18256 ID = Intrinsic::ppc_altivec_vsubecuq;
18257 break;
18258 }
18259 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18260 }
18261 case PPC::BI__builtin_ppc_rldimi:
18262 case PPC::BI__builtin_ppc_rlwimi: {
18263 Value *Op0 = EmitScalarExpr(E->getArg(0));
18264 Value *Op1 = EmitScalarExpr(E->getArg(1));
18265 Value *Op2 = EmitScalarExpr(E->getArg(2));
18266 Value *Op3 = EmitScalarExpr(E->getArg(3));
18267 // rldimi is 64-bit instruction, expand the intrinsic before isel to
18268 // leverage peephole and avoid legalization efforts.
18269 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
18270 !getTarget().getTriple().isPPC64()) {
18271 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
18272 Op2 = Builder.CreateZExt(Op2, Int64Ty);
18273 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
18274 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
18275 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
18276 }
18277 return Builder.CreateCall(
18278 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
18279 ? Intrinsic::ppc_rldimi
18280 : Intrinsic::ppc_rlwimi),
18281 {Op0, Op1, Op2, Op3});
18282 }
18283 case PPC::BI__builtin_ppc_rlwnm: {
18284 Value *Op0 = EmitScalarExpr(E->getArg(0));
18285 Value *Op1 = EmitScalarExpr(E->getArg(1));
18286 Value *Op2 = EmitScalarExpr(E->getArg(2));
18287 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
18288 {Op0, Op1, Op2});
18289 }
18290 case PPC::BI__builtin_ppc_poppar4:
18291 case PPC::BI__builtin_ppc_poppar8: {
18292 Value *Op0 = EmitScalarExpr(E->getArg(0));
18293 llvm::Type *ArgType = Op0->getType();
18294 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
18295 Value *Tmp = Builder.CreateCall(F, Op0);
18296
18297 llvm::Type *ResultType = ConvertType(E->getType());
18298 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
18299 if (Result->getType() != ResultType)
18300 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
18301 "cast");
18302 return Result;
18303 }
18304 case PPC::BI__builtin_ppc_cmpb: {
18305 Value *Op0 = EmitScalarExpr(E->getArg(0));
18306 Value *Op1 = EmitScalarExpr(E->getArg(1));
18307 if (getTarget().getTriple().isPPC64()) {
18308 Function *F =
18309 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
18310 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
18311 }
18312 // For 32 bit, emit the code as below:
18313 // %conv = trunc i64 %a to i32
18314 // %conv1 = trunc i64 %b to i32
18315 // %shr = lshr i64 %a, 32
18316 // %conv2 = trunc i64 %shr to i32
18317 // %shr3 = lshr i64 %b, 32
18318 // %conv4 = trunc i64 %shr3 to i32
18319 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
18320 // %conv5 = zext i32 %0 to i64
18321 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
18322 // %conv614 = zext i32 %1 to i64
18323 // %shl = shl nuw i64 %conv614, 32
18324 // %or = or i64 %shl, %conv5
18325 // ret i64 %or
18326 Function *F =
18327 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
18328 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
18329 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
18330 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
18331 Value *ArgOneHi =
18332 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
18333 Value *ArgTwoHi =
18334 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
18335 Value *ResLo = Builder.CreateZExt(
18336 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
18337 Value *ResHiShift = Builder.CreateZExt(
18338 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
18339 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
18340 return Builder.CreateOr(ResLo, ResHi);
18341 }
18342 // Copy sign
18343 case PPC::BI__builtin_vsx_xvcpsgnsp:
18344 case PPC::BI__builtin_vsx_xvcpsgndp: {
18345 llvm::Type *ResultType = ConvertType(E->getType());
18346 Value *X = EmitScalarExpr(E->getArg(0));
18347 Value *Y = EmitScalarExpr(E->getArg(1));
18348 ID = Intrinsic::copysign;
18349 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18350 return Builder.CreateCall(F, {X, Y});
18351 }
18352 // Rounding/truncation
18353 case PPC::BI__builtin_vsx_xvrspip:
18354 case PPC::BI__builtin_vsx_xvrdpip:
18355 case PPC::BI__builtin_vsx_xvrdpim:
18356 case PPC::BI__builtin_vsx_xvrspim:
18357 case PPC::BI__builtin_vsx_xvrdpi:
18358 case PPC::BI__builtin_vsx_xvrspi:
18359 case PPC::BI__builtin_vsx_xvrdpic:
18360 case PPC::BI__builtin_vsx_xvrspic:
18361 case PPC::BI__builtin_vsx_xvrdpiz:
18362 case PPC::BI__builtin_vsx_xvrspiz: {
18363 llvm::Type *ResultType = ConvertType(E->getType());
18364 Value *X = EmitScalarExpr(E->getArg(0));
18365 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
18366 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
18367 ID = Builder.getIsFPConstrained()
18368 ? Intrinsic::experimental_constrained_floor
18369 : Intrinsic::floor;
18370 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
18371 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
18372 ID = Builder.getIsFPConstrained()
18373 ? Intrinsic::experimental_constrained_round
18374 : Intrinsic::round;
18375 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
18376 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
18377 ID = Builder.getIsFPConstrained()
18378 ? Intrinsic::experimental_constrained_rint
18379 : Intrinsic::rint;
18380 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
18381 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
18382 ID = Builder.getIsFPConstrained()
18383 ? Intrinsic::experimental_constrained_ceil
18384 : Intrinsic::ceil;
18385 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
18386 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
18387 ID = Builder.getIsFPConstrained()
18388 ? Intrinsic::experimental_constrained_trunc
18389 : Intrinsic::trunc;
18390 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18391 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
18392 : Builder.CreateCall(F, X);
18393 }
18394
18395 // Absolute value
18396 case PPC::BI__builtin_vsx_xvabsdp:
18397 case PPC::BI__builtin_vsx_xvabssp: {
18398 llvm::Type *ResultType = ConvertType(E->getType());
18399 Value *X = EmitScalarExpr(E->getArg(0));
18400 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18401 return Builder.CreateCall(F, X);
18402 }
18403
18404 // Fastmath by default
18405 case PPC::BI__builtin_ppc_recipdivf:
18406 case PPC::BI__builtin_ppc_recipdivd:
18407 case PPC::BI__builtin_ppc_rsqrtf:
18408 case PPC::BI__builtin_ppc_rsqrtd: {
18409 FastMathFlags FMF = Builder.getFastMathFlags();
18410 Builder.getFastMathFlags().setFast();
18411 llvm::Type *ResultType = ConvertType(E->getType());
18412 Value *X = EmitScalarExpr(E->getArg(0));
18413
18414 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
18415 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
18416 Value *Y = EmitScalarExpr(E->getArg(1));
18417 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
18418 Builder.getFastMathFlags() &= (FMF);
18419 return FDiv;
18420 }
18421 auto *One = ConstantFP::get(ResultType, 1.0);
18422 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18423 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
18424 Builder.getFastMathFlags() &= (FMF);
18425 return FDiv;
18426 }
18427 case PPC::BI__builtin_ppc_alignx: {
18428 Value *Op0 = EmitScalarExpr(E->getArg(0));
18429 Value *Op1 = EmitScalarExpr(E->getArg(1));
18430 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
18431 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
18432 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
18433 llvm::Value::MaximumAlignment);
18434
18435 emitAlignmentAssumption(Op1, E->getArg(1),
18436 /*The expr loc is sufficient.*/ SourceLocation(),
18437 AlignmentCI, nullptr);
18438 return Op1;
18439 }
18440 case PPC::BI__builtin_ppc_rdlam: {
18441 Value *Op0 = EmitScalarExpr(E->getArg(0));
18442 Value *Op1 = EmitScalarExpr(E->getArg(1));
18443 Value *Op2 = EmitScalarExpr(E->getArg(2));
18444 llvm::Type *Ty = Op0->getType();
18445 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
18446 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
18447 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
18448 return Builder.CreateAnd(Rotate, Op2);
18449 }
18450 case PPC::BI__builtin_ppc_load2r: {
18451 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
18452 Value *Op0 = EmitScalarExpr(E->getArg(0));
18453 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
18454 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
18455 }
18456 // FMA variations
18457 case PPC::BI__builtin_ppc_fnmsub:
18458 case PPC::BI__builtin_ppc_fnmsubs:
18459 case PPC::BI__builtin_vsx_xvmaddadp:
18460 case PPC::BI__builtin_vsx_xvmaddasp:
18461 case PPC::BI__builtin_vsx_xvnmaddadp:
18462 case PPC::BI__builtin_vsx_xvnmaddasp:
18463 case PPC::BI__builtin_vsx_xvmsubadp:
18464 case PPC::BI__builtin_vsx_xvmsubasp:
18465 case PPC::BI__builtin_vsx_xvnmsubadp:
18466 case PPC::BI__builtin_vsx_xvnmsubasp: {
18467 llvm::Type *ResultType = ConvertType(E->getType());
18468 Value *X = EmitScalarExpr(E->getArg(0));
18469 Value *Y = EmitScalarExpr(E->getArg(1));
18470 Value *Z = EmitScalarExpr(E->getArg(2));
18471 llvm::Function *F;
18472 if (Builder.getIsFPConstrained())
18473 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18474 else
18475 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18476 switch (BuiltinID) {
18477 case PPC::BI__builtin_vsx_xvmaddadp:
18478 case PPC::BI__builtin_vsx_xvmaddasp:
18479 if (Builder.getIsFPConstrained())
18480 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
18481 else
18482 return Builder.CreateCall(F, {X, Y, Z});
18483 case PPC::BI__builtin_vsx_xvnmaddadp:
18484 case PPC::BI__builtin_vsx_xvnmaddasp:
18485 if (Builder.getIsFPConstrained())
18486 return Builder.CreateFNeg(
18487 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
18488 else
18489 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
18490 case PPC::BI__builtin_vsx_xvmsubadp:
18491 case PPC::BI__builtin_vsx_xvmsubasp:
18492 if (Builder.getIsFPConstrained())
18493 return Builder.CreateConstrainedFPCall(
18494 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18495 else
18496 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18497 case PPC::BI__builtin_ppc_fnmsub:
18498 case PPC::BI__builtin_ppc_fnmsubs:
18499 case PPC::BI__builtin_vsx_xvnmsubadp:
18500 case PPC::BI__builtin_vsx_xvnmsubasp:
18501 if (Builder.getIsFPConstrained())
18502 return Builder.CreateFNeg(
18503 Builder.CreateConstrainedFPCall(
18504 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
18505 "neg");
18506 else
18507 return Builder.CreateCall(
18508 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
18509 }
18510 llvm_unreachable("Unknown FMA operation");
18511 return nullptr; // Suppress no-return warning
18512 }
18513
18514 case PPC::BI__builtin_vsx_insertword: {
18515 Value *Op0 = EmitScalarExpr(E->getArg(0));
18516 Value *Op1 = EmitScalarExpr(E->getArg(1));
18517 Value *Op2 = EmitScalarExpr(E->getArg(2));
18518 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
18519
18520 // Third argument is a compile time constant int. It must be clamped to
18521 // to the range [0, 12].
18522 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18523 assert(ArgCI &&
18524 "Third arg to xxinsertw intrinsic must be constant integer");
18525 const int64_t MaxIndex = 12;
18526 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18527
18528 // The builtin semantics don't exactly match the xxinsertw instructions
18529 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
18530 // word from the first argument, and inserts it in the second argument. The
18531 // instruction extracts the word from its second input register and inserts
18532 // it into its first input register, so swap the first and second arguments.
18533 std::swap(Op0, Op1);
18534
18535 // Need to cast the second argument from a vector of unsigned int to a
18536 // vector of long long.
18537 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18538
18539 if (getTarget().isLittleEndian()) {
18540 // Reverse the double words in the vector we will extract from.
18541 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18542 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
18543
18544 // Reverse the index.
18545 Index = MaxIndex - Index;
18546 }
18547
18548 // Intrinsic expects the first arg to be a vector of int.
18549 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18550 Op2 = ConstantInt::getSigned(Int32Ty, Index);
18551 return Builder.CreateCall(F, {Op0, Op1, Op2});
18552 }
18553
18554 case PPC::BI__builtin_vsx_extractuword: {
18555 Value *Op0 = EmitScalarExpr(E->getArg(0));
18556 Value *Op1 = EmitScalarExpr(E->getArg(1));
18557 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
18558
18559 // Intrinsic expects the first argument to be a vector of doublewords.
18560 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18561
18562 // The second argument is a compile time constant int that needs to
18563 // be clamped to the range [0, 12].
18564 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
18565 assert(ArgCI &&
18566 "Second Arg to xxextractuw intrinsic must be a constant integer!");
18567 const int64_t MaxIndex = 12;
18568 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18569
18570 if (getTarget().isLittleEndian()) {
18571 // Reverse the index.
18572 Index = MaxIndex - Index;
18573 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18574
18575 // Emit the call, then reverse the double words of the results vector.
18576 Value *Call = Builder.CreateCall(F, {Op0, Op1});
18577
18578 Value *ShuffleCall =
18579 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
18580 return ShuffleCall;
18581 } else {
18582 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18583 return Builder.CreateCall(F, {Op0, Op1});
18584 }
18585 }
18586
18587 case PPC::BI__builtin_vsx_xxpermdi: {
18588 Value *Op0 = EmitScalarExpr(E->getArg(0));
18589 Value *Op1 = EmitScalarExpr(E->getArg(1));
18590 Value *Op2 = EmitScalarExpr(E->getArg(2));
18591 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18592 assert(ArgCI && "Third arg must be constant integer!");
18593
18594 unsigned Index = ArgCI->getZExtValue();
18595 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18596 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18597
18598 // Account for endianness by treating this as just a shuffle. So we use the
18599 // same indices for both LE and BE in order to produce expected results in
18600 // both cases.
18601 int ElemIdx0 = (Index & 2) >> 1;
18602 int ElemIdx1 = 2 + (Index & 1);
18603
18604 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
18605 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18606 QualType BIRetType = E->getType();
18607 auto RetTy = ConvertType(BIRetType);
18608 return Builder.CreateBitCast(ShuffleCall, RetTy);
18609 }
18610
18611 case PPC::BI__builtin_vsx_xxsldwi: {
18612 Value *Op0 = EmitScalarExpr(E->getArg(0));
18613 Value *Op1 = EmitScalarExpr(E->getArg(1));
18614 Value *Op2 = EmitScalarExpr(E->getArg(2));
18615 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18616 assert(ArgCI && "Third argument must be a compile time constant");
18617 unsigned Index = ArgCI->getZExtValue() & 0x3;
18618 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18619 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
18620
18621 // Create a shuffle mask
18622 int ElemIdx0;
18623 int ElemIdx1;
18624 int ElemIdx2;
18625 int ElemIdx3;
18626 if (getTarget().isLittleEndian()) {
18627 // Little endian element N comes from element 8+N-Index of the
18628 // concatenated wide vector (of course, using modulo arithmetic on
18629 // the total number of elements).
18630 ElemIdx0 = (8 - Index) % 8;
18631 ElemIdx1 = (9 - Index) % 8;
18632 ElemIdx2 = (10 - Index) % 8;
18633 ElemIdx3 = (11 - Index) % 8;
18634 } else {
18635 // Big endian ElemIdx<N> = Index + N
18636 ElemIdx0 = Index;
18637 ElemIdx1 = Index + 1;
18638 ElemIdx2 = Index + 2;
18639 ElemIdx3 = Index + 3;
18640 }
18641
18642 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
18643 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18644 QualType BIRetType = E->getType();
18645 auto RetTy = ConvertType(BIRetType);
18646 return Builder.CreateBitCast(ShuffleCall, RetTy);
18647 }
18648
18649 case PPC::BI__builtin_pack_vector_int128: {
18650 Value *Op0 = EmitScalarExpr(E->getArg(0));
18651 Value *Op1 = EmitScalarExpr(E->getArg(1));
18652 bool isLittleEndian = getTarget().isLittleEndian();
18653 Value *PoisonValue =
18654 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
18655 Value *Res = Builder.CreateInsertElement(
18656 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
18657 Res = Builder.CreateInsertElement(Res, Op1,
18658 (uint64_t)(isLittleEndian ? 0 : 1));
18659 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
18660 }
18661
18662 case PPC::BI__builtin_unpack_vector_int128: {
18663 Value *Op0 = EmitScalarExpr(E->getArg(0));
18664 Value *Op1 = EmitScalarExpr(E->getArg(1));
18665 ConstantInt *Index = cast<ConstantInt>(Op1);
18666 Value *Unpacked = Builder.CreateBitCast(
18667 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
18668
18669 if (getTarget().isLittleEndian())
18670 Index =
18671 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
18672
18673 return Builder.CreateExtractElement(Unpacked, Index);
18674 }
18675
18676 case PPC::BI__builtin_ppc_sthcx: {
18677 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
18678 Value *Op0 = EmitScalarExpr(E->getArg(0));
18679 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
18680 return Builder.CreateCall(F, {Op0, Op1});
18681 }
18682
18683 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
18684 // Some of the MMA instructions accumulate their result into an existing
18685 // accumulator whereas the others generate a new accumulator. So we need to
18686 // use custom code generation to expand a builtin call with a pointer to a
18687 // load (if the corresponding instruction accumulates its result) followed by
18688 // the call to the intrinsic and a store of the result.
18689#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18690 case PPC::BI__builtin_##Name:
18691#include "clang/Basic/BuiltinsPPC.def"
18692 {
18694 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
18695 if (E->getArg(i)->getType()->isArrayType())
18696 Ops.push_back(
18697 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
18698 else
18699 Ops.push_back(EmitScalarExpr(E->getArg(i)));
18700 // The first argument of these two builtins is a pointer used to store their
18701 // result. However, the llvm intrinsics return their result in multiple
18702 // return values. So, here we emit code extracting these values from the
18703 // intrinsic results and storing them using that pointer.
18704 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
18705 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
18706 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
18707 unsigned NumVecs = 2;
18708 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
18709 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
18710 NumVecs = 4;
18711 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
18712 }
18713 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
18714 Address Addr = EmitPointerWithAlignment(E->getArg(1));
18715 Value *Vec = Builder.CreateLoad(Addr);
18716 Value *Call = Builder.CreateCall(F, {Vec});
18717 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
18718 Value *Ptr = Ops[0];
18719 for (unsigned i=0; i<NumVecs; i++) {
18720 Value *Vec = Builder.CreateExtractValue(Call, i);
18721 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
18722 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
18723 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
18724 }
18725 return Call;
18726 }
18727 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
18728 BuiltinID == PPC::BI__builtin_mma_build_acc) {
18729 // Reverse the order of the operands for LE, so the
18730 // same builtin call can be used on both LE and BE
18731 // without the need for the programmer to swap operands.
18732 // The operands are reversed starting from the second argument,
18733 // the first operand is the pointer to the pair/accumulator
18734 // that is being built.
18735 if (getTarget().isLittleEndian())
18736 std::reverse(Ops.begin() + 1, Ops.end());
18737 }
18738 bool Accumulate;
18739 switch (BuiltinID) {
18740 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18741 case PPC::BI__builtin_##Name: \
18742 ID = Intrinsic::ppc_##Intr; \
18743 Accumulate = Acc; \
18744 break;
18745 #include "clang/Basic/BuiltinsPPC.def"
18746 }
18747 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18748 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
18749 BuiltinID == PPC::BI__builtin_mma_lxvp ||
18750 BuiltinID == PPC::BI__builtin_mma_stxvp) {
18751 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18752 BuiltinID == PPC::BI__builtin_mma_lxvp) {
18753 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
18754 } else {
18755 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
18756 }
18757 Ops.pop_back();
18758 llvm::Function *F = CGM.getIntrinsic(ID);
18759 return Builder.CreateCall(F, Ops, "");
18760 }
18761 SmallVector<Value*, 4> CallOps;
18762 if (Accumulate) {
18763 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18764 Value *Acc = Builder.CreateLoad(Addr);
18765 CallOps.push_back(Acc);
18766 }
18767 for (unsigned i=1; i<Ops.size(); i++)
18768 CallOps.push_back(Ops[i]);
18769 llvm::Function *F = CGM.getIntrinsic(ID);
18770 Value *Call = Builder.CreateCall(F, CallOps);
18771 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());
18772 }
18773
18774 case PPC::BI__builtin_ppc_compare_and_swap:
18775 case PPC::BI__builtin_ppc_compare_and_swaplp: {
18776 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18777 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
18778 Value *OldVal = Builder.CreateLoad(OldValAddr);
18779 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
18780 LValue LV = MakeAddrLValue(Addr, AtomicTy);
18781 Value *Op2 = EmitScalarExpr(E->getArg(2));
18782 auto Pair = EmitAtomicCompareExchange(
18783 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
18784 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
18785 // Unlike c11's atomic_compare_exchange, according to
18786 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
18787 // > In either case, the contents of the memory location specified by addr
18788 // > are copied into the memory location specified by old_val_addr.
18789 // But it hasn't specified storing to OldValAddr is atomic or not and
18790 // which order to use. Now following XL's codegen, treat it as a normal
18791 // store.
18792 Value *LoadedVal = Pair.first.getScalarVal();
18793 Builder.CreateStore(LoadedVal, OldValAddr);
18794 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
18795 }
18796 case PPC::BI__builtin_ppc_fetch_and_add:
18797 case PPC::BI__builtin_ppc_fetch_and_addlp: {
18798 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
18799 llvm::AtomicOrdering::Monotonic);
18800 }
18801 case PPC::BI__builtin_ppc_fetch_and_and:
18802 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18803 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
18804 llvm::AtomicOrdering::Monotonic);
18805 }
18806
18807 case PPC::BI__builtin_ppc_fetch_and_or:
18808 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18809 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
18810 llvm::AtomicOrdering::Monotonic);
18811 }
18812 case PPC::BI__builtin_ppc_fetch_and_swap:
18813 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18814 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
18815 llvm::AtomicOrdering::Monotonic);
18816 }
18817 case PPC::BI__builtin_ppc_ldarx:
18818 case PPC::BI__builtin_ppc_lwarx:
18819 case PPC::BI__builtin_ppc_lharx:
18820 case PPC::BI__builtin_ppc_lbarx:
18821 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
18822 case PPC::BI__builtin_ppc_mfspr: {
18823 Value *Op0 = EmitScalarExpr(E->getArg(0));
18824 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18825 ? Int32Ty
18826 : Int64Ty;
18827 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
18828 return Builder.CreateCall(F, {Op0});
18829 }
18830 case PPC::BI__builtin_ppc_mtspr: {
18831 Value *Op0 = EmitScalarExpr(E->getArg(0));
18832 Value *Op1 = EmitScalarExpr(E->getArg(1));
18833 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18834 ? Int32Ty
18835 : Int64Ty;
18836 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
18837 return Builder.CreateCall(F, {Op0, Op1});
18838 }
18839 case PPC::BI__builtin_ppc_popcntb: {
18840 Value *ArgValue = EmitScalarExpr(E->getArg(0));
18841 llvm::Type *ArgType = ArgValue->getType();
18842 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
18843 return Builder.CreateCall(F, {ArgValue}, "popcntb");
18844 }
18845 case PPC::BI__builtin_ppc_mtfsf: {
18846 // The builtin takes a uint32 that needs to be cast to an
18847 // f64 to be passed to the intrinsic.
18848 Value *Op0 = EmitScalarExpr(E->getArg(0));
18849 Value *Op1 = EmitScalarExpr(E->getArg(1));
18850 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18851 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18852 return Builder.CreateCall(F, {Op0, Cast}, "");
18853 }
18854
18855 case PPC::BI__builtin_ppc_swdiv_nochk:
18856 case PPC::BI__builtin_ppc_swdivs_nochk: {
18857 Value *Op0 = EmitScalarExpr(E->getArg(0));
18858 Value *Op1 = EmitScalarExpr(E->getArg(1));
18859 FastMathFlags FMF = Builder.getFastMathFlags();
18860 Builder.getFastMathFlags().setFast();
18861 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18862 Builder.getFastMathFlags() &= (FMF);
18863 return FDiv;
18864 }
18865 case PPC::BI__builtin_ppc_fric:
18867 *this, E, Intrinsic::rint,
18868 Intrinsic::experimental_constrained_rint))
18869 .getScalarVal();
18870 case PPC::BI__builtin_ppc_frim:
18871 case PPC::BI__builtin_ppc_frims:
18873 *this, E, Intrinsic::floor,
18874 Intrinsic::experimental_constrained_floor))
18875 .getScalarVal();
18876 case PPC::BI__builtin_ppc_frin:
18877 case PPC::BI__builtin_ppc_frins:
18879 *this, E, Intrinsic::round,
18880 Intrinsic::experimental_constrained_round))
18881 .getScalarVal();
18882 case PPC::BI__builtin_ppc_frip:
18883 case PPC::BI__builtin_ppc_frips:
18885 *this, E, Intrinsic::ceil,
18886 Intrinsic::experimental_constrained_ceil))
18887 .getScalarVal();
18888 case PPC::BI__builtin_ppc_friz:
18889 case PPC::BI__builtin_ppc_frizs:
18891 *this, E, Intrinsic::trunc,
18892 Intrinsic::experimental_constrained_trunc))
18893 .getScalarVal();
18894 case PPC::BI__builtin_ppc_fsqrt:
18895 case PPC::BI__builtin_ppc_fsqrts:
18897 *this, E, Intrinsic::sqrt,
18898 Intrinsic::experimental_constrained_sqrt))
18899 .getScalarVal();
18900 case PPC::BI__builtin_ppc_test_data_class: {
18901 Value *Op0 = EmitScalarExpr(E->getArg(0));
18902 Value *Op1 = EmitScalarExpr(E->getArg(1));
18903 return Builder.CreateCall(
18904 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18905 {Op0, Op1}, "test_data_class");
18906 }
18907 case PPC::BI__builtin_ppc_maxfe: {
18908 Value *Op0 = EmitScalarExpr(E->getArg(0));
18909 Value *Op1 = EmitScalarExpr(E->getArg(1));
18910 Value *Op2 = EmitScalarExpr(E->getArg(2));
18911 Value *Op3 = EmitScalarExpr(E->getArg(3));
18912 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18913 {Op0, Op1, Op2, Op3});
18914 }
18915 case PPC::BI__builtin_ppc_maxfl: {
18916 Value *Op0 = EmitScalarExpr(E->getArg(0));
18917 Value *Op1 = EmitScalarExpr(E->getArg(1));
18918 Value *Op2 = EmitScalarExpr(E->getArg(2));
18919 Value *Op3 = EmitScalarExpr(E->getArg(3));
18920 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18921 {Op0, Op1, Op2, Op3});
18922 }
18923 case PPC::BI__builtin_ppc_maxfs: {
18924 Value *Op0 = EmitScalarExpr(E->getArg(0));
18925 Value *Op1 = EmitScalarExpr(E->getArg(1));
18926 Value *Op2 = EmitScalarExpr(E->getArg(2));
18927 Value *Op3 = EmitScalarExpr(E->getArg(3));
18928 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18929 {Op0, Op1, Op2, Op3});
18930 }
18931 case PPC::BI__builtin_ppc_minfe: {
18932 Value *Op0 = EmitScalarExpr(E->getArg(0));
18933 Value *Op1 = EmitScalarExpr(E->getArg(1));
18934 Value *Op2 = EmitScalarExpr(E->getArg(2));
18935 Value *Op3 = EmitScalarExpr(E->getArg(3));
18936 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18937 {Op0, Op1, Op2, Op3});
18938 }
18939 case PPC::BI__builtin_ppc_minfl: {
18940 Value *Op0 = EmitScalarExpr(E->getArg(0));
18941 Value *Op1 = EmitScalarExpr(E->getArg(1));
18942 Value *Op2 = EmitScalarExpr(E->getArg(2));
18943 Value *Op3 = EmitScalarExpr(E->getArg(3));
18944 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18945 {Op0, Op1, Op2, Op3});
18946 }
18947 case PPC::BI__builtin_ppc_minfs: {
18948 Value *Op0 = EmitScalarExpr(E->getArg(0));
18949 Value *Op1 = EmitScalarExpr(E->getArg(1));
18950 Value *Op2 = EmitScalarExpr(E->getArg(2));
18951 Value *Op3 = EmitScalarExpr(E->getArg(3));
18952 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18953 {Op0, Op1, Op2, Op3});
18954 }
18955 case PPC::BI__builtin_ppc_swdiv:
18956 case PPC::BI__builtin_ppc_swdivs: {
18957 Value *Op0 = EmitScalarExpr(E->getArg(0));
18958 Value *Op1 = EmitScalarExpr(E->getArg(1));
18959 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18960 }
18961 case PPC::BI__builtin_ppc_set_fpscr_rn:
18962 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18963 {EmitScalarExpr(E->getArg(0))});
18964 case PPC::BI__builtin_ppc_mffs:
18965 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18966 }
18967}
18968
18969namespace {
18970// If \p E is not null pointer, insert address space cast to match return
18971// type of \p E if necessary.
18972Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18973 const CallExpr *E = nullptr) {
18974 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18975 auto *Call = CGF.Builder.CreateCall(F);
18976 Call->addRetAttr(
18977 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18978 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18979 if (!E)
18980 return Call;
18981 QualType BuiltinRetType = E->getType();
18982 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18983 if (RetTy == Call->getType())
18984 return Call;
18985 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18986}
18987
18988Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18989 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18990 auto *Call = CGF.Builder.CreateCall(F);
18991 Call->addRetAttr(
18992 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18993 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18994 return Call;
18995}
18996
18997// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18998/// Emit code based on Code Object ABI version.
18999/// COV_4 : Emit code to use dispatch ptr
19000/// COV_5+ : Emit code to use implicitarg ptr
19001/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
19002/// and use its value for COV_4 or COV_5+ approach. It is used for
19003/// compiling device libraries in an ABI-agnostic way.
19004///
19005/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
19006/// clang during compilation of user code.
19007Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
19008 llvm::LoadInst *LD;
19009
19010 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
19011
19012 if (Cov == CodeObjectVersionKind::COV_None) {
19013 StringRef Name = "__oclc_ABI_version";
19014 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
19015 if (!ABIVersionC)
19016 ABIVersionC = new llvm::GlobalVariable(
19017 CGF.CGM.getModule(), CGF.Int32Ty, false,
19018 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
19019 llvm::GlobalVariable::NotThreadLocal,
19021
19022 // This load will be eliminated by the IPSCCP because it is constant
19023 // weak_odr without externally_initialized. Either changing it to weak or
19024 // adding externally_initialized will keep the load.
19025 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
19026 CGF.CGM.getIntAlign());
19027
19028 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
19029 ABIVersion,
19030 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
19031
19032 // Indexing the implicit kernarg segment.
19033 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
19034 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
19035
19036 // Indexing the HSA kernel_dispatch_packet struct.
19037 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
19038 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
19039
19040 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
19041 LD = CGF.Builder.CreateLoad(
19043 } else {
19044 Value *GEP = nullptr;
19045 if (Cov >= CodeObjectVersionKind::COV_5) {
19046 // Indexing the implicit kernarg segment.
19047 GEP = CGF.Builder.CreateConstGEP1_32(
19048 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
19049 } else {
19050 // Indexing the HSA kernel_dispatch_packet struct.
19051 GEP = CGF.Builder.CreateConstGEP1_32(
19052 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
19053 }
19054 LD = CGF.Builder.CreateLoad(
19056 }
19057
19058 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
19059 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
19060 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
19061 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
19062 LD->setMetadata(llvm::LLVMContext::MD_noundef,
19063 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19064 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19065 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19066 return LD;
19067}
19068
19069// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
19070Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
19071 const unsigned XOffset = 12;
19072 auto *DP = EmitAMDGPUDispatchPtr(CGF);
19073 // Indexing the HSA kernel_dispatch_packet struct.
19074 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
19075 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
19076 auto *LD = CGF.Builder.CreateLoad(
19078
19079 llvm::MDBuilder MDB(CGF.getLLVMContext());
19080
19081 // Known non-zero.
19082 LD->setMetadata(llvm::LLVMContext::MD_range,
19083 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
19084 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19085 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19086 return LD;
19087}
19088} // namespace
19089
19090// For processing memory ordering and memory scope arguments of various
19091// amdgcn builtins.
19092// \p Order takes a C++11 comptabile memory-ordering specifier and converts
19093// it into LLVM's memory ordering specifier using atomic C ABI, and writes
19094// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
19095// specific SyncScopeID and writes it to \p SSID.
19097 llvm::AtomicOrdering &AO,
19098 llvm::SyncScope::ID &SSID) {
19099 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
19100
19101 // Map C11/C++11 memory ordering to LLVM memory ordering
19102 assert(llvm::isValidAtomicOrderingCABI(ord));
19103 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
19104 case llvm::AtomicOrderingCABI::acquire:
19105 case llvm::AtomicOrderingCABI::consume:
19106 AO = llvm::AtomicOrdering::Acquire;
19107 break;
19108 case llvm::AtomicOrderingCABI::release:
19109 AO = llvm::AtomicOrdering::Release;
19110 break;
19111 case llvm::AtomicOrderingCABI::acq_rel:
19112 AO = llvm::AtomicOrdering::AcquireRelease;
19113 break;
19114 case llvm::AtomicOrderingCABI::seq_cst:
19115 AO = llvm::AtomicOrdering::SequentiallyConsistent;
19116 break;
19117 case llvm::AtomicOrderingCABI::relaxed:
19118 AO = llvm::AtomicOrdering::Monotonic;
19119 break;
19120 }
19121
19122 // Some of the atomic builtins take the scope as a string name.
19123 StringRef scp;
19124 if (llvm::getConstantStringInfo(Scope, scp)) {
19125 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
19126 return;
19127 }
19128
19129 // Older builtins had an enum argument for the memory scope.
19130 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
19131 switch (scope) {
19132 case 0: // __MEMORY_SCOPE_SYSTEM
19133 SSID = llvm::SyncScope::System;
19134 break;
19135 case 1: // __MEMORY_SCOPE_DEVICE
19136 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
19137 break;
19138 case 2: // __MEMORY_SCOPE_WRKGRP
19139 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
19140 break;
19141 case 3: // __MEMORY_SCOPE_WVFRNT
19142 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
19143 break;
19144 case 4: // __MEMORY_SCOPE_SINGLE
19145 SSID = llvm::SyncScope::SingleThread;
19146 break;
19147 default:
19148 SSID = llvm::SyncScope::System;
19149 break;
19150 }
19151}
19152
19153llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
19154 unsigned Idx,
19155 const CallExpr *E) {
19156 llvm::Value *Arg = nullptr;
19157 if ((ICEArguments & (1 << Idx)) == 0) {
19158 Arg = EmitScalarExpr(E->getArg(Idx));
19159 } else {
19160 // If this is required to be a constant, constant fold it so that we
19161 // know that the generated intrinsic gets a ConstantInt.
19162 std::optional<llvm::APSInt> Result =
19163 E->getArg(Idx)->getIntegerConstantExpr(getContext());
19164 assert(Result && "Expected argument to be a constant");
19165 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
19166 }
19167 return Arg;
19168}
19169
19170// Return dot product intrinsic that corresponds to the QT scalar type
19171static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19172 if (QT->isFloatingType())
19173 return RT.getFDotIntrinsic();
19174 if (QT->isSignedIntegerType())
19175 return RT.getSDotIntrinsic();
19176 assert(QT->isUnsignedIntegerType());
19177 return RT.getUDotIntrinsic();
19178}
19179
19180static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19182 return RT.getFirstBitSHighIntrinsic();
19183 }
19184
19186 return RT.getFirstBitUHighIntrinsic();
19187}
19188
19190 const CallExpr *E,
19191 ReturnValueSlot ReturnValue) {
19192 if (!getLangOpts().HLSL)
19193 return nullptr;
19194
19195 switch (BuiltinID) {
19196 case Builtin::BI__builtin_hlsl_resource_getpointer: {
19197 Value *HandleOp = EmitScalarExpr(E->getArg(0));
19198 Value *IndexOp = EmitScalarExpr(E->getArg(1));
19199
19200 // TODO: Map to an hlsl_device address space.
19201 llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext());
19202
19203 return Builder.CreateIntrinsic(
19204 RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
19205 ArrayRef<Value *>{HandleOp, IndexOp});
19206 }
19207 case Builtin::BI__builtin_hlsl_all: {
19208 Value *Op0 = EmitScalarExpr(E->getArg(0));
19209 return Builder.CreateIntrinsic(
19210 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19211 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19212 "hlsl.all");
19213 }
19214 case Builtin::BI__builtin_hlsl_any: {
19215 Value *Op0 = EmitScalarExpr(E->getArg(0));
19216 return Builder.CreateIntrinsic(
19217 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19218 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19219 "hlsl.any");
19220 }
19221 case Builtin::BI__builtin_hlsl_asdouble:
19222 return handleAsDoubleBuiltin(*this, E);
19223 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
19224 Value *OpX = EmitScalarExpr(E->getArg(0));
19225 Value *OpMin = EmitScalarExpr(E->getArg(1));
19226 Value *OpMax = EmitScalarExpr(E->getArg(2));
19227
19228 QualType Ty = E->getArg(0)->getType();
19229 if (auto *VecTy = Ty->getAs<VectorType>())
19230 Ty = VecTy->getElementType();
19231
19232 Intrinsic::ID Intr;
19233 if (Ty->isFloatingType()) {
19234 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
19235 } else if (Ty->isUnsignedIntegerType()) {
19236 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
19237 } else {
19238 assert(Ty->isSignedIntegerType());
19239 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
19240 }
19241 return Builder.CreateIntrinsic(
19242 /*ReturnType=*/OpX->getType(), Intr,
19243 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
19244 }
19245 case Builtin::BI__builtin_hlsl_cross: {
19246 Value *Op0 = EmitScalarExpr(E->getArg(0));
19247 Value *Op1 = EmitScalarExpr(E->getArg(1));
19248 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19249 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19250 "cross operands must have a float representation");
19251 // make sure each vector has exactly 3 elements
19252 assert(
19253 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19254 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19255 "input vectors must have 3 elements each");
19256 return Builder.CreateIntrinsic(
19257 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
19258 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
19259 }
19260 case Builtin::BI__builtin_hlsl_dot: {
19261 Value *Op0 = EmitScalarExpr(E->getArg(0));
19262 Value *Op1 = EmitScalarExpr(E->getArg(1));
19263 llvm::Type *T0 = Op0->getType();
19264 llvm::Type *T1 = Op1->getType();
19265
19266 // If the arguments are scalars, just emit a multiply
19267 if (!T0->isVectorTy() && !T1->isVectorTy()) {
19268 if (T0->isFloatingPointTy())
19269 return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
19270
19271 if (T0->isIntegerTy())
19272 return Builder.CreateMul(Op0, Op1, "hlsl.dot");
19273
19274 llvm_unreachable(
19275 "Scalar dot product is only supported on ints and floats.");
19276 }
19277 // For vectors, validate types and emit the appropriate intrinsic
19278
19279 // A VectorSplat should have happened
19280 assert(T0->isVectorTy() && T1->isVectorTy() &&
19281 "Dot product of vector and scalar is not supported.");
19282
19283 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
19284 [[maybe_unused]] auto *VecTy1 =
19285 E->getArg(1)->getType()->getAs<VectorType>();
19286
19287 assert(VecTy0->getElementType() == VecTy1->getElementType() &&
19288 "Dot product of vectors need the same element types.");
19289
19290 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
19291 "Dot product requires vectors to be of the same size.");
19292
19293 return Builder.CreateIntrinsic(
19294 /*ReturnType=*/T0->getScalarType(),
19295 getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
19296 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
19297 }
19298 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
19299 Value *A = EmitScalarExpr(E->getArg(0));
19300 Value *B = EmitScalarExpr(E->getArg(1));
19301 Value *C = EmitScalarExpr(E->getArg(2));
19302
19303 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
19304 return Builder.CreateIntrinsic(
19305 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19306 "hlsl.dot4add.i8packed");
19307 }
19308 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
19309 Value *A = EmitScalarExpr(E->getArg(0));
19310 Value *B = EmitScalarExpr(E->getArg(1));
19311 Value *C = EmitScalarExpr(E->getArg(2));
19312
19313 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
19314 return Builder.CreateIntrinsic(
19315 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19316 "hlsl.dot4add.u8packed");
19317 }
19318 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
19319
19320 Value *X = EmitScalarExpr(E->getArg(0));
19321
19322 return Builder.CreateIntrinsic(
19323 /*ReturnType=*/ConvertType(E->getType()),
19325 ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
19326 }
19327 case Builtin::BI__builtin_hlsl_lerp: {
19328 Value *X = EmitScalarExpr(E->getArg(0));
19329 Value *Y = EmitScalarExpr(E->getArg(1));
19330 Value *S = EmitScalarExpr(E->getArg(2));
19331 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19332 llvm_unreachable("lerp operand must have a float representation");
19333 return Builder.CreateIntrinsic(
19334 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
19335 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
19336 }
19337 case Builtin::BI__builtin_hlsl_length: {
19338 Value *X = EmitScalarExpr(E->getArg(0));
19339
19340 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19341 "length operand must have a float representation");
19342 // if the operand is a scalar, we can use the fabs llvm intrinsic directly
19343 if (!E->getArg(0)->getType()->isVectorType())
19344 return EmitFAbs(*this, X);
19345
19346 return Builder.CreateIntrinsic(
19347 /*ReturnType=*/X->getType()->getScalarType(),
19348 CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
19349 nullptr, "hlsl.length");
19350 }
19351 case Builtin::BI__builtin_hlsl_normalize: {
19352 Value *X = EmitScalarExpr(E->getArg(0));
19353
19354 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19355 "normalize operand must have a float representation");
19356
19357 return Builder.CreateIntrinsic(
19358 /*ReturnType=*/X->getType(),
19359 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
19360 nullptr, "hlsl.normalize");
19361 }
19362 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
19363 Value *X = EmitScalarExpr(E->getArg(0));
19364
19365 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19366 "degree operand must have a float representation");
19367
19368 return Builder.CreateIntrinsic(
19369 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
19370 ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
19371 }
19372 case Builtin::BI__builtin_hlsl_elementwise_frac: {
19373 Value *Op0 = EmitScalarExpr(E->getArg(0));
19374 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19375 llvm_unreachable("frac operand must have a float representation");
19376 return Builder.CreateIntrinsic(
19377 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
19378 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
19379}
19380case Builtin::BI__builtin_hlsl_elementwise_isinf: {
19381 Value *Op0 = EmitScalarExpr(E->getArg(0));
19382 llvm::Type *Xty = Op0->getType();
19383 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
19384 if (Xty->isVectorTy()) {
19385 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
19386 retType = llvm::VectorType::get(
19387 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19388 }
19389 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19390 llvm_unreachable("isinf operand must have a float representation");
19391 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
19392 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
19393 }
19394 case Builtin::BI__builtin_hlsl_mad: {
19395 Value *M = EmitScalarExpr(E->getArg(0));
19396 Value *A = EmitScalarExpr(E->getArg(1));
19397 Value *B = EmitScalarExpr(E->getArg(2));
19398 if (E->getArg(0)->getType()->hasFloatingRepresentation())
19399 return Builder.CreateIntrinsic(
19400 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
19401 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
19402
19403 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
19404 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19405 return Builder.CreateIntrinsic(
19406 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
19407 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
19408
19409 Value *Mul = Builder.CreateNSWMul(M, A);
19410 return Builder.CreateNSWAdd(Mul, B);
19411 }
19412 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
19413 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19414 return Builder.CreateIntrinsic(
19415 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
19416 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
19417
19418 Value *Mul = Builder.CreateNUWMul(M, A);
19419 return Builder.CreateNUWAdd(Mul, B);
19420 }
19421 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
19422 Value *Op0 = EmitScalarExpr(E->getArg(0));
19423 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19424 llvm_unreachable("rcp operand must have a float representation");
19425 llvm::Type *Ty = Op0->getType();
19426 llvm::Type *EltTy = Ty->getScalarType();
19427 Constant *One = Ty->isVectorTy()
19428 ? ConstantVector::getSplat(
19429 ElementCount::getFixed(
19430 cast<FixedVectorType>(Ty)->getNumElements()),
19431 ConstantFP::get(EltTy, 1.0))
19432 : ConstantFP::get(EltTy, 1.0);
19433 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
19434 }
19435 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
19436 Value *Op0 = EmitScalarExpr(E->getArg(0));
19437 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19438 llvm_unreachable("rsqrt operand must have a float representation");
19439 return Builder.CreateIntrinsic(
19440 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
19441 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
19442 }
19443 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
19444 Value *Op0 = EmitScalarExpr(E->getArg(0));
19445 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19446 "saturate operand must have a float representation");
19447 return Builder.CreateIntrinsic(
19448 /*ReturnType=*/Op0->getType(),
19449 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
19450 nullptr, "hlsl.saturate");
19451 }
19452 case Builtin::BI__builtin_hlsl_select: {
19453 Value *OpCond = EmitScalarExpr(E->getArg(0));
19454 RValue RValTrue = EmitAnyExpr(E->getArg(1));
19455 Value *OpTrue =
19456 RValTrue.isScalar()
19457 ? RValTrue.getScalarVal()
19458 : RValTrue.getAggregatePointer(E->getArg(1)->getType(), *this);
19459 RValue RValFalse = EmitAnyExpr(E->getArg(2));
19460 Value *OpFalse =
19461 RValFalse.isScalar()
19462 ? RValFalse.getScalarVal()
19463 : RValFalse.getAggregatePointer(E->getArg(2)->getType(), *this);
19464
19465 Value *SelectVal =
19466 Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
19467 if (!RValTrue.isScalar())
19468 Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
19469 ReturnValue.isVolatile());
19470
19471 return SelectVal;
19472 }
19473 case Builtin::BI__builtin_hlsl_step: {
19474 Value *Op0 = EmitScalarExpr(E->getArg(0));
19475 Value *Op1 = EmitScalarExpr(E->getArg(1));
19476 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19477 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19478 "step operands must have a float representation");
19479 return Builder.CreateIntrinsic(
19480 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
19481 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
19482 }
19483 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
19484 Value *Op = EmitScalarExpr(E->getArg(0));
19485 assert(Op->getType()->isIntegerTy(1) &&
19486 "Intrinsic WaveActiveAllTrue operand must be a bool");
19487
19488 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
19489 return EmitRuntimeCall(
19490 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19491 }
19492 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
19493 Value *Op = EmitScalarExpr(E->getArg(0));
19494 assert(Op->getType()->isIntegerTy(1) &&
19495 "Intrinsic WaveActiveAnyTrue operand must be a bool");
19496
19497 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
19498 return EmitRuntimeCall(
19499 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19500 }
19501 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
19502 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19503 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
19504 return EmitRuntimeCall(
19505 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
19506 ArrayRef{OpExpr});
19507 }
19508 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
19509 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
19510 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
19511 // for the DirectX intrinsic and the demangled builtin name
19512 switch (CGM.getTarget().getTriple().getArch()) {
19513 case llvm::Triple::dxil:
19514 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
19515 &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
19516 case llvm::Triple::spirv:
19518 llvm::FunctionType::get(IntTy, {}, false),
19519 "__hlsl_wave_get_lane_index", {}, false, true));
19520 default:
19521 llvm_unreachable(
19522 "Intrinsic WaveGetLaneIndex not supported by target architecture");
19523 }
19524 }
19525 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
19526 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
19527 return EmitRuntimeCall(
19528 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19529 }
19530 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
19531 // Due to the use of variadic arguments we must explicitly retreive them and
19532 // create our function type.
19533 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19534 Value *OpIndex = EmitScalarExpr(E->getArg(1));
19535 llvm::FunctionType *FT = llvm::FunctionType::get(
19536 OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()},
19537 false);
19538
19539 // Get overloaded name
19540 std::string Name =
19541 Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
19542 ArrayRef{OpExpr->getType()}, &CGM.getModule());
19543 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19544 /*Local=*/false,
19545 /*AssumeConvergent=*/true),
19546 ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
19547 }
19548 case Builtin::BI__builtin_hlsl_elementwise_sign: {
19549 auto *Arg0 = E->getArg(0);
19550 Value *Op0 = EmitScalarExpr(Arg0);
19551 llvm::Type *Xty = Op0->getType();
19552 llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
19553 if (Xty->isVectorTy()) {
19554 auto *XVecTy = Arg0->getType()->getAs<VectorType>();
19555 retType = llvm::VectorType::get(
19556 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19557 }
19558 assert((Arg0->getType()->hasFloatingRepresentation() ||
19559 Arg0->getType()->hasIntegerRepresentation()) &&
19560 "sign operand must have a float or int representation");
19561
19563 Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
19564 return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
19565 ConstantInt::get(retType, 1), "hlsl.sign");
19566 }
19567
19568 return Builder.CreateIntrinsic(
19569 retType, CGM.getHLSLRuntime().getSignIntrinsic(),
19570 ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
19571 }
19572 case Builtin::BI__builtin_hlsl_elementwise_radians: {
19573 Value *Op0 = EmitScalarExpr(E->getArg(0));
19574 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19575 "radians operand must have a float representation");
19576 return Builder.CreateIntrinsic(
19577 /*ReturnType=*/Op0->getType(),
19578 CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
19579 nullptr, "hlsl.radians");
19580 }
19581 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
19582 Value *ResHandle = EmitScalarExpr(E->getArg(0));
19583 Value *Offset = EmitScalarExpr(E->getArg(1));
19584 Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
19585 return Builder.CreateIntrinsic(
19586 /*ReturnType=*/Offset->getType(),
19587 CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
19588 ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
19589 }
19590 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
19591
19592 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
19593 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
19594 E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
19595 "asuint operands types mismatch");
19596 return handleHlslSplitdouble(E, this);
19597 }
19598 case Builtin::BI__builtin_hlsl_elementwise_clip:
19599 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19600 "clip operands types mismatch");
19601 return handleHlslClip(E, this);
19602 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
19603 Intrinsic::ID ID =
19604 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
19605 return EmitRuntimeCall(
19606 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19607 }
19608 }
19609 return nullptr;
19610}
19611
19612void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
19613 const CallExpr *E) {
19614 constexpr const char *Tag = "amdgpu-as";
19615
19616 LLVMContext &Ctx = Inst->getContext();
19618 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
19619 llvm::Value *V = EmitScalarExpr(E->getArg(K));
19620 StringRef AS;
19621 if (llvm::getConstantStringInfo(V, AS)) {
19622 MMRAs.push_back({Tag, AS});
19623 // TODO: Delete the resulting unused constant?
19624 continue;
19625 }
19626 CGM.Error(E->getExprLoc(),
19627 "expected an address space name as a string literal");
19628 }
19629
19630 llvm::sort(MMRAs);
19631 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
19632 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
19633}
19634
19636 const CallExpr *E) {
19637 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
19638 llvm::SyncScope::ID SSID;
19639 switch (BuiltinID) {
19640 case AMDGPU::BI__builtin_amdgcn_div_scale:
19641 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
19642 // Translate from the intrinsics's struct return to the builtin's out
19643 // argument.
19644
19645 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
19646
19647 llvm::Value *X = EmitScalarExpr(E->getArg(0));
19648 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
19649 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
19650
19651 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
19652 X->getType());
19653
19654 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
19655
19656 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
19657 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
19658
19659 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
19660
19661 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
19662 Builder.CreateStore(FlagExt, FlagOutPtr);
19663 return Result;
19664 }
19665 case AMDGPU::BI__builtin_amdgcn_div_fmas:
19666 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
19667 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19668 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19669 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19670 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
19671
19672 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
19673 Src0->getType());
19674 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
19675 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
19676 }
19677
19678 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
19679 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19680 Intrinsic::amdgcn_ds_swizzle);
19681 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
19682 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
19683 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
19685 // Find out if any arguments are required to be integer constant
19686 // expressions.
19687 unsigned ICEArguments = 0;
19689 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
19690 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19691 llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
19692 unsigned Size = DataTy->getPrimitiveSizeInBits();
19693 llvm::Type *IntTy =
19694 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
19695 Function *F =
19696 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
19697 ? Intrinsic::amdgcn_mov_dpp8
19698 : Intrinsic::amdgcn_update_dpp,
19699 IntTy);
19700 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
19701 E->getNumArgs() == 2);
19702 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
19703 if (InsertOld)
19704 Args.push_back(llvm::PoisonValue::get(IntTy));
19705 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
19706 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
19707 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
19708 Size < 32) {
19709 if (!DataTy->isIntegerTy())
19710 V = Builder.CreateBitCast(
19711 V, llvm::IntegerType::get(Builder.getContext(), Size));
19712 V = Builder.CreateZExtOrBitCast(V, IntTy);
19713 }
19714 llvm::Type *ExpTy =
19715 F->getFunctionType()->getFunctionParamType(I + InsertOld);
19716 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
19717 }
19718 Value *V = Builder.CreateCall(F, Args);
19719 if (Size < 32 && !DataTy->isIntegerTy())
19720 V = Builder.CreateTrunc(
19721 V, llvm::IntegerType::get(Builder.getContext(), Size));
19722 return Builder.CreateTruncOrBitCast(V, DataTy);
19723 }
19724 case AMDGPU::BI__builtin_amdgcn_permlane16:
19725 case AMDGPU::BI__builtin_amdgcn_permlanex16:
19726 return emitBuiltinWithOneOverloadedType<6>(
19727 *this, E,
19728 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
19729 ? Intrinsic::amdgcn_permlane16
19730 : Intrinsic::amdgcn_permlanex16);
19731 case AMDGPU::BI__builtin_amdgcn_permlane64:
19732 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19733 Intrinsic::amdgcn_permlane64);
19734 case AMDGPU::BI__builtin_amdgcn_readlane:
19735 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19736 Intrinsic::amdgcn_readlane);
19737 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
19738 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19739 Intrinsic::amdgcn_readfirstlane);
19740 case AMDGPU::BI__builtin_amdgcn_div_fixup:
19741 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
19742 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
19743 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19744 Intrinsic::amdgcn_div_fixup);
19745 case AMDGPU::BI__builtin_amdgcn_trig_preop:
19746 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
19747 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
19748 case AMDGPU::BI__builtin_amdgcn_rcp:
19749 case AMDGPU::BI__builtin_amdgcn_rcpf:
19750 case AMDGPU::BI__builtin_amdgcn_rcph:
19751 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
19752 case AMDGPU::BI__builtin_amdgcn_sqrt:
19753 case AMDGPU::BI__builtin_amdgcn_sqrtf:
19754 case AMDGPU::BI__builtin_amdgcn_sqrth:
19755 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19756 Intrinsic::amdgcn_sqrt);
19757 case AMDGPU::BI__builtin_amdgcn_rsq:
19758 case AMDGPU::BI__builtin_amdgcn_rsqf:
19759 case AMDGPU::BI__builtin_amdgcn_rsqh:
19760 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
19761 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
19762 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
19763 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19764 Intrinsic::amdgcn_rsq_clamp);
19765 case AMDGPU::BI__builtin_amdgcn_sinf:
19766 case AMDGPU::BI__builtin_amdgcn_sinh:
19767 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
19768 case AMDGPU::BI__builtin_amdgcn_cosf:
19769 case AMDGPU::BI__builtin_amdgcn_cosh:
19770 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
19771 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
19772 return EmitAMDGPUDispatchPtr(*this, E);
19773 case AMDGPU::BI__builtin_amdgcn_logf:
19774 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
19775 case AMDGPU::BI__builtin_amdgcn_exp2f:
19776 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19777 Intrinsic::amdgcn_exp2);
19778 case AMDGPU::BI__builtin_amdgcn_log_clampf:
19779 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19780 Intrinsic::amdgcn_log_clamp);
19781 case AMDGPU::BI__builtin_amdgcn_ldexp:
19782 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
19783 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19784 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19785 llvm::Function *F =
19786 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
19787 return Builder.CreateCall(F, {Src0, Src1});
19788 }
19789 case AMDGPU::BI__builtin_amdgcn_ldexph: {
19790 // The raw instruction has a different behavior for out of bounds exponent
19791 // values (implicit truncation instead of saturate to short_min/short_max).
19792 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19793 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19794 llvm::Function *F =
19795 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
19796 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
19797 }
19798 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
19799 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
19800 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
19801 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19802 Intrinsic::amdgcn_frexp_mant);
19803 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
19804 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
19805 Value *Src0 = EmitScalarExpr(E->getArg(0));
19806 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19807 { Builder.getInt32Ty(), Src0->getType() });
19808 return Builder.CreateCall(F, Src0);
19809 }
19810 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
19811 Value *Src0 = EmitScalarExpr(E->getArg(0));
19812 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19813 { Builder.getInt16Ty(), Src0->getType() });
19814 return Builder.CreateCall(F, Src0);
19815 }
19816 case AMDGPU::BI__builtin_amdgcn_fract:
19817 case AMDGPU::BI__builtin_amdgcn_fractf:
19818 case AMDGPU::BI__builtin_amdgcn_fracth:
19819 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19820 Intrinsic::amdgcn_fract);
19821 case AMDGPU::BI__builtin_amdgcn_lerp:
19822 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19823 Intrinsic::amdgcn_lerp);
19824 case AMDGPU::BI__builtin_amdgcn_ubfe:
19825 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19826 Intrinsic::amdgcn_ubfe);
19827 case AMDGPU::BI__builtin_amdgcn_sbfe:
19828 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19829 Intrinsic::amdgcn_sbfe);
19830 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
19831 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
19832 llvm::Type *ResultType = ConvertType(E->getType());
19833 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19834 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
19835 return Builder.CreateCall(F, { Src });
19836 }
19837 case AMDGPU::BI__builtin_amdgcn_uicmp:
19838 case AMDGPU::BI__builtin_amdgcn_uicmpl:
19839 case AMDGPU::BI__builtin_amdgcn_sicmp:
19840 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
19841 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19842 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19843 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19844
19845 // FIXME-GFX10: How should 32 bit mask be handled?
19846 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
19847 { Builder.getInt64Ty(), Src0->getType() });
19848 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19849 }
19850 case AMDGPU::BI__builtin_amdgcn_fcmp:
19851 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
19852 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19853 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19854 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19855
19856 // FIXME-GFX10: How should 32 bit mask be handled?
19857 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
19858 { Builder.getInt64Ty(), Src0->getType() });
19859 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19860 }
19861 case AMDGPU::BI__builtin_amdgcn_class:
19862 case AMDGPU::BI__builtin_amdgcn_classf:
19863 case AMDGPU::BI__builtin_amdgcn_classh:
19864 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
19865 case AMDGPU::BI__builtin_amdgcn_fmed3f:
19866 case AMDGPU::BI__builtin_amdgcn_fmed3h:
19867 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19868 Intrinsic::amdgcn_fmed3);
19869 case AMDGPU::BI__builtin_amdgcn_ds_append:
19870 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
19871 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
19872 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
19873 Value *Src0 = EmitScalarExpr(E->getArg(0));
19874 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
19875 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
19876 }
19877 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19878 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19879 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19880 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19881 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19882 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19883 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19884 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19885 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19886 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19887 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19888 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19889 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19890 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
19891 Intrinsic::ID IID;
19892 switch (BuiltinID) {
19893 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19894 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19895 IID = Intrinsic::amdgcn_global_load_tr_b64;
19896 break;
19897 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19898 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19899 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19900 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19901 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19902 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19903 IID = Intrinsic::amdgcn_global_load_tr_b128;
19904 break;
19905 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19906 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
19907 break;
19908 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19909 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
19910 break;
19911 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19912 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
19913 break;
19914 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
19915 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19916 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19917 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
19918 break;
19919 }
19920 llvm::Type *LoadTy = ConvertType(E->getType());
19921 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
19922 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
19923 return Builder.CreateCall(F, {Addr});
19924 }
19925 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
19926 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
19927 {llvm::Type::getInt64Ty(getLLVMContext())});
19928 return Builder.CreateCall(F);
19929 }
19930 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
19931 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
19932 {llvm::Type::getInt64Ty(getLLVMContext())});
19933 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
19934 return Builder.CreateCall(F, {Env});
19935 }
19936 case AMDGPU::BI__builtin_amdgcn_read_exec:
19937 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
19938 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
19939 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
19940 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
19941 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
19942 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
19943 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
19944 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
19945 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
19946 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
19947 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
19948 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
19949 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
19950 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
19951 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
19952
19953 // The builtins take these arguments as vec4 where the last element is
19954 // ignored. The intrinsic takes them as vec3.
19955 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
19956 ArrayRef<int>{0, 1, 2});
19957 RayDir =
19958 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
19959 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
19960 ArrayRef<int>{0, 1, 2});
19961
19962 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
19963 {NodePtr->getType(), RayDir->getType()});
19964 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
19965 RayInverseDir, TextureDescr});
19966 }
19967
19968 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
19970 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19971 Args.push_back(EmitScalarExpr(E->getArg(i)));
19972
19973 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
19974 Value *Call = Builder.CreateCall(F, Args);
19975 Value *Rtn = Builder.CreateExtractValue(Call, 0);
19976 Value *A = Builder.CreateExtractValue(Call, 1);
19977 llvm::Type *RetTy = ConvertType(E->getType());
19978 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
19979 (uint64_t)0);
19980 return Builder.CreateInsertElement(I0, A, 1);
19981 }
19982 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
19983 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
19984 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
19986 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19987 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19988 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
19989 {VT, VT});
19990
19992 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
19993 Args.push_back(EmitScalarExpr(E->getArg(I)));
19994 return Builder.CreateCall(F, Args);
19995 }
19996 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19997 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19998 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19999 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20000 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20001 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20002 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20003 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20004 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20005 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20006 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
20007 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
20008 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20009 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20010 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20011 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20012 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20013 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20014 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20015 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20016 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20017 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20018 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
20019 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
20020 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20021 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20022 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20023 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20024 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20025 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20026 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20027 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20028 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20029 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20030 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20031 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20032 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20033 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20034 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20035 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20036 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20037 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20038 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20039 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20040 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20041 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20042 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20043 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20044 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20045 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20046 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20047 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20048 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20049 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20050 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20051 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20052 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20053 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20054 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20055 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
20056
20057 // These operations perform a matrix multiplication and accumulation of
20058 // the form:
20059 // D = A * B + C
20060 // We need to specify one type for matrices AB and one for matrices CD.
20061 // Sparse matrix operations can have different types for A and B as well as
20062 // an additional type for sparsity index.
20063 // Destination type should be put before types used for source operands.
20064 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
20065 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
20066 // There is no need for the variable opsel argument, so always set it to
20067 // "false".
20068 bool AppendFalseForOpselArg = false;
20069 unsigned BuiltinWMMAOp;
20070
20071 switch (BuiltinID) {
20072 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
20073 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
20074 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
20075 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
20076 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20077 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
20078 break;
20079 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20080 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20081 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20082 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20083 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20084 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
20085 break;
20086 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20087 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20088 AppendFalseForOpselArg = true;
20089 [[fallthrough]];
20090 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20091 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20092 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20093 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
20094 break;
20095 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20096 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20097 AppendFalseForOpselArg = true;
20098 [[fallthrough]];
20099 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20100 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20101 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20102 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
20103 break;
20104 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20105 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20106 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20107 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
20108 break;
20109 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20110 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20111 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20112 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
20113 break;
20114 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20115 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20116 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20117 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20118 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20119 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
20120 break;
20121 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20122 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20123 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20124 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20125 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20126 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
20127 break;
20128 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20129 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20130 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20131 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
20132 break;
20133 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20134 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20135 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20136 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
20137 break;
20138 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20139 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20140 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20141 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
20142 break;
20143 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20144 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20145 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20146 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
20147 break;
20148 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20149 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20150 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20151 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
20152 break;
20153 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20154 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20155 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20156 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
20157 break;
20158 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20159 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20160 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20161 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
20162 break;
20163 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20164 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20165 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20166 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
20167 break;
20168 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20169 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20170 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20171 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
20172 break;
20173 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20174 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20175 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20176 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
20177 break;
20178 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20179 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20180 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20181 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
20182 break;
20183 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20184 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20185 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20186 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
20187 break;
20188 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20189 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20190 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20191 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
20192 break;
20193 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20194 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20195 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20196 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
20197 break;
20198 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20199 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20200 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20201 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
20202 break;
20203 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20204 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
20205 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20206 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
20207 break;
20208 }
20209
20211 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
20212 Args.push_back(EmitScalarExpr(E->getArg(i)));
20213 if (AppendFalseForOpselArg)
20214 Args.push_back(Builder.getFalse());
20215
20217 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
20218 ArgTypes.push_back(Args[ArgIdx]->getType());
20219
20220 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
20221 return Builder.CreateCall(F, Args);
20222 }
20223
20224 // amdgcn workitem
20225 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
20226 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
20227 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
20228 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
20229 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
20230 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
20231
20232 // amdgcn workgroup size
20233 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
20234 return EmitAMDGPUWorkGroupSize(*this, 0);
20235 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
20236 return EmitAMDGPUWorkGroupSize(*this, 1);
20237 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
20238 return EmitAMDGPUWorkGroupSize(*this, 2);
20239
20240 // amdgcn grid size
20241 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
20242 return EmitAMDGPUGridSize(*this, 0);
20243 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
20244 return EmitAMDGPUGridSize(*this, 1);
20245 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
20246 return EmitAMDGPUGridSize(*this, 2);
20247
20248 // r600 intrinsics
20249 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
20250 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
20251 return emitBuiltinWithOneOverloadedType<1>(*this, E,
20252 Intrinsic::r600_recipsqrt_ieee);
20253 case AMDGPU::BI__builtin_r600_read_tidig_x:
20254 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
20255 case AMDGPU::BI__builtin_r600_read_tidig_y:
20256 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
20257 case AMDGPU::BI__builtin_r600_read_tidig_z:
20258 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
20259 case AMDGPU::BI__builtin_amdgcn_alignbit: {
20260 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
20261 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
20262 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
20263 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
20264 return Builder.CreateCall(F, { Src0, Src1, Src2 });
20265 }
20266 case AMDGPU::BI__builtin_amdgcn_fence: {
20268 EmitScalarExpr(E->getArg(1)), AO, SSID);
20269 FenceInst *Fence = Builder.CreateFence(AO, SSID);
20270 if (E->getNumArgs() > 2)
20272 return Fence;
20273 }
20274 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20275 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20276 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20277 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20278 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20279 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20280 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20281 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20282 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20283 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20284 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20285 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20286 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20287 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20288 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20289 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20290 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20291 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20292 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20293 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20294 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20295 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20296 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
20297 llvm::AtomicRMWInst::BinOp BinOp;
20298 switch (BuiltinID) {
20299 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20300 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20301 BinOp = llvm::AtomicRMWInst::UIncWrap;
20302 break;
20303 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20304 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20305 BinOp = llvm::AtomicRMWInst::UDecWrap;
20306 break;
20307 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20308 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20309 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20310 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20311 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20312 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20313 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20314 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20315 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20316 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20317 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20318 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20319 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20320 BinOp = llvm::AtomicRMWInst::FAdd;
20321 break;
20322 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20323 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20324 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20325 BinOp = llvm::AtomicRMWInst::FMin;
20326 break;
20327 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20328 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
20329 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20330 BinOp = llvm::AtomicRMWInst::FMax;
20331 break;
20332 }
20333
20334 Address Ptr = CheckAtomicAlignment(*this, E);
20335 Value *Val = EmitScalarExpr(E->getArg(1));
20336 llvm::Type *OrigTy = Val->getType();
20337 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
20338
20339 bool Volatile;
20340
20341 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
20342 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
20343 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
20344 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
20345 Volatile =
20346 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
20347 } else {
20348 // Infer volatile from the passed type.
20349 Volatile =
20351 }
20352
20353 if (E->getNumArgs() >= 4) {
20354 // Some of the builtins have explicit ordering and scope arguments.
20356 EmitScalarExpr(E->getArg(3)), AO, SSID);
20357 } else {
20358 // Most of the builtins do not have syncscope/order arguments. For DS
20359 // atomics the scope doesn't really matter, as they implicitly operate at
20360 // workgroup scope.
20361 //
20362 // The global/flat cases need to use agent scope to consistently produce
20363 // the native instruction instead of a cmpxchg expansion.
20364 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
20365 AO = AtomicOrdering::Monotonic;
20366
20367 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
20368 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
20369 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
20370 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
20371 llvm::Type *V2BF16Ty = FixedVectorType::get(
20372 llvm::Type::getBFloatTy(Builder.getContext()), 2);
20373 Val = Builder.CreateBitCast(Val, V2BF16Ty);
20374 }
20375 }
20376
20377 llvm::AtomicRMWInst *RMW =
20378 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
20379 if (Volatile)
20380 RMW->setVolatile(true);
20381
20382 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
20383 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
20384 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
20385 // instruction for flat and global operations.
20386 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
20387 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
20388
20389 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
20390 // instruction, but this only matters for float fadd.
20391 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
20392 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
20393 }
20394
20395 return Builder.CreateBitCast(RMW, OrigTy);
20396 }
20397 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
20398 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
20399 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
20400 llvm::Type *ResultType = ConvertType(E->getType());
20401 // s_sendmsg_rtn is mangled using return type only.
20402 Function *F =
20403 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
20404 return Builder.CreateCall(F, {Arg});
20405 }
20406 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
20407 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
20408 // Because builtin types are limited, and the intrinsic uses a struct/pair
20409 // output, marshal the pair-of-i32 to <2 x i32>.
20410 Value *VDstOld = EmitScalarExpr(E->getArg(0));
20411 Value *VSrcOld = EmitScalarExpr(E->getArg(1));
20412 Value *FI = EmitScalarExpr(E->getArg(2));
20413 Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
20414 Function *F =
20415 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
20416 ? Intrinsic::amdgcn_permlane16_swap
20417 : Intrinsic::amdgcn_permlane32_swap);
20418 llvm::CallInst *Call =
20419 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
20420
20421 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
20422 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
20423
20424 llvm::Type *ResultType = ConvertType(E->getType());
20425
20426 llvm::Value *Insert0 = Builder.CreateInsertElement(
20427 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
20428 llvm::Value *AsVector =
20429 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
20430 return AsVector;
20431 }
20432 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
20433 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
20434 return emitBuiltinWithOneOverloadedType<4>(*this, E,
20435 Intrinsic::amdgcn_bitop3);
20436 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
20437 return emitBuiltinWithOneOverloadedType<4>(
20438 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
20439 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
20440 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
20441 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
20442 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
20443 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
20444 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
20445 return emitBuiltinWithOneOverloadedType<5>(
20446 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
20447 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20448 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20449 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20450 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20451 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20452 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
20453 llvm::Type *RetTy = nullptr;
20454 switch (BuiltinID) {
20455 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20456 RetTy = Int8Ty;
20457 break;
20458 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20459 RetTy = Int16Ty;
20460 break;
20461 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20462 RetTy = Int32Ty;
20463 break;
20464 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20465 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
20466 break;
20467 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20468 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
20469 break;
20470 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
20471 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
20472 break;
20473 }
20474 Function *F =
20475 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
20476 return Builder.CreateCall(
20477 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
20478 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
20479 }
20480 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
20481 return emitBuiltinWithOneOverloadedType<2>(
20482 *this, E, Intrinsic::amdgcn_s_prefetch_data);
20483 default:
20484 return nullptr;
20485 }
20486}
20487
20489 const CallExpr *E) {
20490 switch (BuiltinID) {
20491 case SPIRV::BI__builtin_spirv_distance: {
20492 Value *X = EmitScalarExpr(E->getArg(0));
20493 Value *Y = EmitScalarExpr(E->getArg(1));
20494 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
20495 E->getArg(1)->getType()->hasFloatingRepresentation() &&
20496 "Distance operands must have a float representation");
20497 assert(E->getArg(0)->getType()->isVectorType() &&
20498 E->getArg(1)->getType()->isVectorType() &&
20499 "Distance operands must be a vector");
20500 return Builder.CreateIntrinsic(
20501 /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_distance,
20502 ArrayRef<Value *>{X, Y}, nullptr, "spv.distance");
20503 }
20504 }
20505 return nullptr;
20506}
20507
20508/// Handle a SystemZ function in which the final argument is a pointer
20509/// to an int that receives the post-instruction CC value. At the LLVM level
20510/// this is represented as a function that returns a {result, cc} pair.
20512 unsigned IntrinsicID,
20513 const CallExpr *E) {
20514 unsigned NumArgs = E->getNumArgs() - 1;
20515 SmallVector<Value *, 8> Args(NumArgs);
20516 for (unsigned I = 0; I < NumArgs; ++I)
20517 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
20518 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
20519 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
20520 Value *Call = CGF.Builder.CreateCall(F, Args);
20521 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
20522 CGF.Builder.CreateStore(CC, CCPtr);
20523 return CGF.Builder.CreateExtractValue(Call, 0);
20524}
20525
20527 const CallExpr *E) {
20528 switch (BuiltinID) {
20529 case SystemZ::BI__builtin_tbegin: {
20530 Value *TDB = EmitScalarExpr(E->getArg(0));
20531 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20532 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
20533 return Builder.CreateCall(F, {TDB, Control});
20534 }
20535 case SystemZ::BI__builtin_tbegin_nofloat: {
20536 Value *TDB = EmitScalarExpr(E->getArg(0));
20537 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20538 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
20539 return Builder.CreateCall(F, {TDB, Control});
20540 }
20541 case SystemZ::BI__builtin_tbeginc: {
20542 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
20543 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
20544 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
20545 return Builder.CreateCall(F, {TDB, Control});
20546 }
20547 case SystemZ::BI__builtin_tabort: {
20548 Value *Data = EmitScalarExpr(E->getArg(0));
20549 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
20550 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
20551 }
20552 case SystemZ::BI__builtin_non_tx_store: {
20553 Value *Address = EmitScalarExpr(E->getArg(0));
20554 Value *Data = EmitScalarExpr(E->getArg(1));
20555 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
20556 return Builder.CreateCall(F, {Data, Address});
20557 }
20558
20559 // Vector builtins. Note that most vector builtins are mapped automatically
20560 // to target-specific LLVM intrinsics. The ones handled specially here can
20561 // be represented via standard LLVM IR, which is preferable to enable common
20562 // LLVM optimizations.
20563
20564 case SystemZ::BI__builtin_s390_vclzb:
20565 case SystemZ::BI__builtin_s390_vclzh:
20566 case SystemZ::BI__builtin_s390_vclzf:
20567 case SystemZ::BI__builtin_s390_vclzg: {
20568 llvm::Type *ResultType = ConvertType(E->getType());
20569 Value *X = EmitScalarExpr(E->getArg(0));
20570 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20571 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
20572 return Builder.CreateCall(F, {X, Undef});
20573 }
20574
20575 case SystemZ::BI__builtin_s390_vctzb:
20576 case SystemZ::BI__builtin_s390_vctzh:
20577 case SystemZ::BI__builtin_s390_vctzf:
20578 case SystemZ::BI__builtin_s390_vctzg: {
20579 llvm::Type *ResultType = ConvertType(E->getType());
20580 Value *X = EmitScalarExpr(E->getArg(0));
20581 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20582 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
20583 return Builder.CreateCall(F, {X, Undef});
20584 }
20585
20586 case SystemZ::BI__builtin_s390_verllb:
20587 case SystemZ::BI__builtin_s390_verllh:
20588 case SystemZ::BI__builtin_s390_verllf:
20589 case SystemZ::BI__builtin_s390_verllg: {
20590 llvm::Type *ResultType = ConvertType(E->getType());
20591 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20592 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20593 // Splat scalar rotate amount to vector type.
20594 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
20595 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
20596 Amt = Builder.CreateVectorSplat(NumElts, Amt);
20597 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20598 return Builder.CreateCall(F, { Src, Src, Amt });
20599 }
20600
20601 case SystemZ::BI__builtin_s390_verllvb:
20602 case SystemZ::BI__builtin_s390_verllvh:
20603 case SystemZ::BI__builtin_s390_verllvf:
20604 case SystemZ::BI__builtin_s390_verllvg: {
20605 llvm::Type *ResultType = ConvertType(E->getType());
20606 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20607 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20608 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20609 return Builder.CreateCall(F, { Src, Src, Amt });
20610 }
20611
20612 case SystemZ::BI__builtin_s390_vfsqsb:
20613 case SystemZ::BI__builtin_s390_vfsqdb: {
20614 llvm::Type *ResultType = ConvertType(E->getType());
20615 Value *X = EmitScalarExpr(E->getArg(0));
20616 if (Builder.getIsFPConstrained()) {
20617 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
20618 return Builder.CreateConstrainedFPCall(F, { X });
20619 } else {
20620 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
20621 return Builder.CreateCall(F, X);
20622 }
20623 }
20624 case SystemZ::BI__builtin_s390_vfmasb:
20625 case SystemZ::BI__builtin_s390_vfmadb: {
20626 llvm::Type *ResultType = ConvertType(E->getType());
20627 Value *X = EmitScalarExpr(E->getArg(0));
20628 Value *Y = EmitScalarExpr(E->getArg(1));
20629 Value *Z = EmitScalarExpr(E->getArg(2));
20630 if (Builder.getIsFPConstrained()) {
20631 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20632 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
20633 } else {
20634 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20635 return Builder.CreateCall(F, {X, Y, Z});
20636 }
20637 }
20638 case SystemZ::BI__builtin_s390_vfmssb:
20639 case SystemZ::BI__builtin_s390_vfmsdb: {
20640 llvm::Type *ResultType = ConvertType(E->getType());
20641 Value *X = EmitScalarExpr(E->getArg(0));
20642 Value *Y = EmitScalarExpr(E->getArg(1));
20643 Value *Z = EmitScalarExpr(E->getArg(2));
20644 if (Builder.getIsFPConstrained()) {
20645 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20646 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20647 } else {
20648 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20649 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20650 }
20651 }
20652 case SystemZ::BI__builtin_s390_vfnmasb:
20653 case SystemZ::BI__builtin_s390_vfnmadb: {
20654 llvm::Type *ResultType = ConvertType(E->getType());
20655 Value *X = EmitScalarExpr(E->getArg(0));
20656 Value *Y = EmitScalarExpr(E->getArg(1));
20657 Value *Z = EmitScalarExpr(E->getArg(2));
20658 if (Builder.getIsFPConstrained()) {
20659 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20660 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
20661 } else {
20662 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20663 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
20664 }
20665 }
20666 case SystemZ::BI__builtin_s390_vfnmssb:
20667 case SystemZ::BI__builtin_s390_vfnmsdb: {
20668 llvm::Type *ResultType = ConvertType(E->getType());
20669 Value *X = EmitScalarExpr(E->getArg(0));
20670 Value *Y = EmitScalarExpr(E->getArg(1));
20671 Value *Z = EmitScalarExpr(E->getArg(2));
20672 if (Builder.getIsFPConstrained()) {
20673 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20674 Value *NegZ = Builder.CreateFNeg(Z, "sub");
20675 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
20676 } else {
20677 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20678 Value *NegZ = Builder.CreateFNeg(Z, "neg");
20679 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
20680 }
20681 }
20682 case SystemZ::BI__builtin_s390_vflpsb:
20683 case SystemZ::BI__builtin_s390_vflpdb: {
20684 llvm::Type *ResultType = ConvertType(E->getType());
20685 Value *X = EmitScalarExpr(E->getArg(0));
20686 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20687 return Builder.CreateCall(F, X);
20688 }
20689 case SystemZ::BI__builtin_s390_vflnsb:
20690 case SystemZ::BI__builtin_s390_vflndb: {
20691 llvm::Type *ResultType = ConvertType(E->getType());
20692 Value *X = EmitScalarExpr(E->getArg(0));
20693 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20694 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
20695 }
20696 case SystemZ::BI__builtin_s390_vfisb:
20697 case SystemZ::BI__builtin_s390_vfidb: {
20698 llvm::Type *ResultType = ConvertType(E->getType());
20699 Value *X = EmitScalarExpr(E->getArg(0));
20700 // Constant-fold the M4 and M5 mask arguments.
20701 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
20702 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20703 // Check whether this instance can be represented via a LLVM standard
20704 // intrinsic. We only support some combinations of M4 and M5.
20705 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20706 Intrinsic::ID CI;
20707 switch (M4.getZExtValue()) {
20708 default: break;
20709 case 0: // IEEE-inexact exception allowed
20710 switch (M5.getZExtValue()) {
20711 default: break;
20712 case 0: ID = Intrinsic::rint;
20713 CI = Intrinsic::experimental_constrained_rint; break;
20714 }
20715 break;
20716 case 4: // IEEE-inexact exception suppressed
20717 switch (M5.getZExtValue()) {
20718 default: break;
20719 case 0: ID = Intrinsic::nearbyint;
20720 CI = Intrinsic::experimental_constrained_nearbyint; break;
20721 case 1: ID = Intrinsic::round;
20722 CI = Intrinsic::experimental_constrained_round; break;
20723 case 5: ID = Intrinsic::trunc;
20724 CI = Intrinsic::experimental_constrained_trunc; break;
20725 case 6: ID = Intrinsic::ceil;
20726 CI = Intrinsic::experimental_constrained_ceil; break;
20727 case 7: ID = Intrinsic::floor;
20728 CI = Intrinsic::experimental_constrained_floor; break;
20729 }
20730 break;
20731 }
20732 if (ID != Intrinsic::not_intrinsic) {
20733 if (Builder.getIsFPConstrained()) {
20734 Function *F = CGM.getIntrinsic(CI, ResultType);
20735 return Builder.CreateConstrainedFPCall(F, X);
20736 } else {
20737 Function *F = CGM.getIntrinsic(ID, ResultType);
20738 return Builder.CreateCall(F, X);
20739 }
20740 }
20741 switch (BuiltinID) { // FIXME: constrained version?
20742 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
20743 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
20744 default: llvm_unreachable("Unknown BuiltinID");
20745 }
20746 Function *F = CGM.getIntrinsic(ID);
20747 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20748 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
20749 return Builder.CreateCall(F, {X, M4Value, M5Value});
20750 }
20751 case SystemZ::BI__builtin_s390_vfmaxsb:
20752 case SystemZ::BI__builtin_s390_vfmaxdb: {
20753 llvm::Type *ResultType = ConvertType(E->getType());
20754 Value *X = EmitScalarExpr(E->getArg(0));
20755 Value *Y = EmitScalarExpr(E->getArg(1));
20756 // Constant-fold the M4 mask argument.
20757 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20758 // Check whether this instance can be represented via a LLVM standard
20759 // intrinsic. We only support some values of M4.
20760 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20761 Intrinsic::ID CI;
20762 switch (M4.getZExtValue()) {
20763 default: break;
20764 case 4: ID = Intrinsic::maxnum;
20765 CI = Intrinsic::experimental_constrained_maxnum; break;
20766 }
20767 if (ID != Intrinsic::not_intrinsic) {
20768 if (Builder.getIsFPConstrained()) {
20769 Function *F = CGM.getIntrinsic(CI, ResultType);
20770 return Builder.CreateConstrainedFPCall(F, {X, Y});
20771 } else {
20772 Function *F = CGM.getIntrinsic(ID, ResultType);
20773 return Builder.CreateCall(F, {X, Y});
20774 }
20775 }
20776 switch (BuiltinID) {
20777 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
20778 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
20779 default: llvm_unreachable("Unknown BuiltinID");
20780 }
20781 Function *F = CGM.getIntrinsic(ID);
20782 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20783 return Builder.CreateCall(F, {X, Y, M4Value});
20784 }
20785 case SystemZ::BI__builtin_s390_vfminsb:
20786 case SystemZ::BI__builtin_s390_vfmindb: {
20787 llvm::Type *ResultType = ConvertType(E->getType());
20788 Value *X = EmitScalarExpr(E->getArg(0));
20789 Value *Y = EmitScalarExpr(E->getArg(1));
20790 // Constant-fold the M4 mask argument.
20791 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20792 // Check whether this instance can be represented via a LLVM standard
20793 // intrinsic. We only support some values of M4.
20794 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20795 Intrinsic::ID CI;
20796 switch (M4.getZExtValue()) {
20797 default: break;
20798 case 4: ID = Intrinsic::minnum;
20799 CI = Intrinsic::experimental_constrained_minnum; break;
20800 }
20801 if (ID != Intrinsic::not_intrinsic) {
20802 if (Builder.getIsFPConstrained()) {
20803 Function *F = CGM.getIntrinsic(CI, ResultType);
20804 return Builder.CreateConstrainedFPCall(F, {X, Y});
20805 } else {
20806 Function *F = CGM.getIntrinsic(ID, ResultType);
20807 return Builder.CreateCall(F, {X, Y});
20808 }
20809 }
20810 switch (BuiltinID) {
20811 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
20812 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
20813 default: llvm_unreachable("Unknown BuiltinID");
20814 }
20815 Function *F = CGM.getIntrinsic(ID);
20816 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20817 return Builder.CreateCall(F, {X, Y, M4Value});
20818 }
20819
20820 case SystemZ::BI__builtin_s390_vlbrh:
20821 case SystemZ::BI__builtin_s390_vlbrf:
20822 case SystemZ::BI__builtin_s390_vlbrg: {
20823 llvm::Type *ResultType = ConvertType(E->getType());
20824 Value *X = EmitScalarExpr(E->getArg(0));
20825 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
20826 return Builder.CreateCall(F, X);
20827 }
20828
20829 // Vector intrinsics that output the post-instruction CC value.
20830
20831#define INTRINSIC_WITH_CC(NAME) \
20832 case SystemZ::BI__builtin_##NAME: \
20833 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
20834
20835 INTRINSIC_WITH_CC(s390_vpkshs);
20836 INTRINSIC_WITH_CC(s390_vpksfs);
20837 INTRINSIC_WITH_CC(s390_vpksgs);
20838
20839 INTRINSIC_WITH_CC(s390_vpklshs);
20840 INTRINSIC_WITH_CC(s390_vpklsfs);
20841 INTRINSIC_WITH_CC(s390_vpklsgs);
20842
20843 INTRINSIC_WITH_CC(s390_vceqbs);
20844 INTRINSIC_WITH_CC(s390_vceqhs);
20845 INTRINSIC_WITH_CC(s390_vceqfs);
20846 INTRINSIC_WITH_CC(s390_vceqgs);
20847
20848 INTRINSIC_WITH_CC(s390_vchbs);
20849 INTRINSIC_WITH_CC(s390_vchhs);
20850 INTRINSIC_WITH_CC(s390_vchfs);
20851 INTRINSIC_WITH_CC(s390_vchgs);
20852
20853 INTRINSIC_WITH_CC(s390_vchlbs);
20854 INTRINSIC_WITH_CC(s390_vchlhs);
20855 INTRINSIC_WITH_CC(s390_vchlfs);
20856 INTRINSIC_WITH_CC(s390_vchlgs);
20857
20858 INTRINSIC_WITH_CC(s390_vfaebs);
20859 INTRINSIC_WITH_CC(s390_vfaehs);
20860 INTRINSIC_WITH_CC(s390_vfaefs);
20861
20862 INTRINSIC_WITH_CC(s390_vfaezbs);
20863 INTRINSIC_WITH_CC(s390_vfaezhs);
20864 INTRINSIC_WITH_CC(s390_vfaezfs);
20865
20866 INTRINSIC_WITH_CC(s390_vfeebs);
20867 INTRINSIC_WITH_CC(s390_vfeehs);
20868 INTRINSIC_WITH_CC(s390_vfeefs);
20869
20870 INTRINSIC_WITH_CC(s390_vfeezbs);
20871 INTRINSIC_WITH_CC(s390_vfeezhs);
20872 INTRINSIC_WITH_CC(s390_vfeezfs);
20873
20874 INTRINSIC_WITH_CC(s390_vfenebs);
20875 INTRINSIC_WITH_CC(s390_vfenehs);
20876 INTRINSIC_WITH_CC(s390_vfenefs);
20877
20878 INTRINSIC_WITH_CC(s390_vfenezbs);
20879 INTRINSIC_WITH_CC(s390_vfenezhs);
20880 INTRINSIC_WITH_CC(s390_vfenezfs);
20881
20882 INTRINSIC_WITH_CC(s390_vistrbs);
20883 INTRINSIC_WITH_CC(s390_vistrhs);
20884 INTRINSIC_WITH_CC(s390_vistrfs);
20885
20886 INTRINSIC_WITH_CC(s390_vstrcbs);
20887 INTRINSIC_WITH_CC(s390_vstrchs);
20888 INTRINSIC_WITH_CC(s390_vstrcfs);
20889
20890 INTRINSIC_WITH_CC(s390_vstrczbs);
20891 INTRINSIC_WITH_CC(s390_vstrczhs);
20892 INTRINSIC_WITH_CC(s390_vstrczfs);
20893
20894 INTRINSIC_WITH_CC(s390_vfcesbs);
20895 INTRINSIC_WITH_CC(s390_vfcedbs);
20896 INTRINSIC_WITH_CC(s390_vfchsbs);
20897 INTRINSIC_WITH_CC(s390_vfchdbs);
20898 INTRINSIC_WITH_CC(s390_vfchesbs);
20899 INTRINSIC_WITH_CC(s390_vfchedbs);
20900
20901 INTRINSIC_WITH_CC(s390_vftcisb);
20902 INTRINSIC_WITH_CC(s390_vftcidb);
20903
20904 INTRINSIC_WITH_CC(s390_vstrsb);
20905 INTRINSIC_WITH_CC(s390_vstrsh);
20906 INTRINSIC_WITH_CC(s390_vstrsf);
20907
20908 INTRINSIC_WITH_CC(s390_vstrszb);
20909 INTRINSIC_WITH_CC(s390_vstrszh);
20910 INTRINSIC_WITH_CC(s390_vstrszf);
20911
20912#undef INTRINSIC_WITH_CC
20913
20914 default:
20915 return nullptr;
20916 }
20917}
20918
20919namespace {
20920// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
20921struct NVPTXMmaLdstInfo {
20922 unsigned NumResults; // Number of elements to load/store
20923 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
20924 unsigned IID_col;
20925 unsigned IID_row;
20926};
20927
20928#define MMA_INTR(geom_op_type, layout) \
20929 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
20930#define MMA_LDST(n, geom_op_type) \
20931 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
20932
20933static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
20934 switch (BuiltinID) {
20935 // FP MMA loads
20936 case NVPTX::BI__hmma_m16n16k16_ld_a:
20937 return MMA_LDST(8, m16n16k16_load_a_f16);
20938 case NVPTX::BI__hmma_m16n16k16_ld_b:
20939 return MMA_LDST(8, m16n16k16_load_b_f16);
20940 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20941 return MMA_LDST(4, m16n16k16_load_c_f16);
20942 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20943 return MMA_LDST(8, m16n16k16_load_c_f32);
20944 case NVPTX::BI__hmma_m32n8k16_ld_a:
20945 return MMA_LDST(8, m32n8k16_load_a_f16);
20946 case NVPTX::BI__hmma_m32n8k16_ld_b:
20947 return MMA_LDST(8, m32n8k16_load_b_f16);
20948 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20949 return MMA_LDST(4, m32n8k16_load_c_f16);
20950 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20951 return MMA_LDST(8, m32n8k16_load_c_f32);
20952 case NVPTX::BI__hmma_m8n32k16_ld_a:
20953 return MMA_LDST(8, m8n32k16_load_a_f16);
20954 case NVPTX::BI__hmma_m8n32k16_ld_b:
20955 return MMA_LDST(8, m8n32k16_load_b_f16);
20956 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20957 return MMA_LDST(4, m8n32k16_load_c_f16);
20958 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20959 return MMA_LDST(8, m8n32k16_load_c_f32);
20960
20961 // Integer MMA loads
20962 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20963 return MMA_LDST(2, m16n16k16_load_a_s8);
20964 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20965 return MMA_LDST(2, m16n16k16_load_a_u8);
20966 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20967 return MMA_LDST(2, m16n16k16_load_b_s8);
20968 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20969 return MMA_LDST(2, m16n16k16_load_b_u8);
20970 case NVPTX::BI__imma_m16n16k16_ld_c:
20971 return MMA_LDST(8, m16n16k16_load_c_s32);
20972 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20973 return MMA_LDST(4, m32n8k16_load_a_s8);
20974 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20975 return MMA_LDST(4, m32n8k16_load_a_u8);
20976 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20977 return MMA_LDST(1, m32n8k16_load_b_s8);
20978 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20979 return MMA_LDST(1, m32n8k16_load_b_u8);
20980 case NVPTX::BI__imma_m32n8k16_ld_c:
20981 return MMA_LDST(8, m32n8k16_load_c_s32);
20982 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20983 return MMA_LDST(1, m8n32k16_load_a_s8);
20984 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20985 return MMA_LDST(1, m8n32k16_load_a_u8);
20986 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20987 return MMA_LDST(4, m8n32k16_load_b_s8);
20988 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20989 return MMA_LDST(4, m8n32k16_load_b_u8);
20990 case NVPTX::BI__imma_m8n32k16_ld_c:
20991 return MMA_LDST(8, m8n32k16_load_c_s32);
20992
20993 // Sub-integer MMA loads.
20994 // Only row/col layout is supported by A/B fragments.
20995 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20996 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
20997 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20998 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
20999 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21000 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
21001 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21002 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
21003 case NVPTX::BI__imma_m8n8k32_ld_c:
21004 return MMA_LDST(2, m8n8k32_load_c_s32);
21005 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21006 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
21007 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21008 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
21009 case NVPTX::BI__bmma_m8n8k128_ld_c:
21010 return MMA_LDST(2, m8n8k128_load_c_s32);
21011
21012 // Double MMA loads
21013 case NVPTX::BI__dmma_m8n8k4_ld_a:
21014 return MMA_LDST(1, m8n8k4_load_a_f64);
21015 case NVPTX::BI__dmma_m8n8k4_ld_b:
21016 return MMA_LDST(1, m8n8k4_load_b_f64);
21017 case NVPTX::BI__dmma_m8n8k4_ld_c:
21018 return MMA_LDST(2, m8n8k4_load_c_f64);
21019
21020 // Alternate float MMA loads
21021 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21022 return MMA_LDST(4, m16n16k16_load_a_bf16);
21023 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21024 return MMA_LDST(4, m16n16k16_load_b_bf16);
21025 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21026 return MMA_LDST(2, m8n32k16_load_a_bf16);
21027 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21028 return MMA_LDST(8, m8n32k16_load_b_bf16);
21029 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21030 return MMA_LDST(8, m32n8k16_load_a_bf16);
21031 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21032 return MMA_LDST(2, m32n8k16_load_b_bf16);
21033 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21034 return MMA_LDST(4, m16n16k8_load_a_tf32);
21035 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21036 return MMA_LDST(4, m16n16k8_load_b_tf32);
21037 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
21038 return MMA_LDST(8, m16n16k8_load_c_f32);
21039
21040 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
21041 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
21042 // use fragment C for both loads and stores.
21043 // FP MMA stores.
21044 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21045 return MMA_LDST(4, m16n16k16_store_d_f16);
21046 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21047 return MMA_LDST(8, m16n16k16_store_d_f32);
21048 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21049 return MMA_LDST(4, m32n8k16_store_d_f16);
21050 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21051 return MMA_LDST(8, m32n8k16_store_d_f32);
21052 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21053 return MMA_LDST(4, m8n32k16_store_d_f16);
21054 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21055 return MMA_LDST(8, m8n32k16_store_d_f32);
21056
21057 // Integer and sub-integer MMA stores.
21058 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
21059 // name, integer loads/stores use LLVM's i32.
21060 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21061 return MMA_LDST(8, m16n16k16_store_d_s32);
21062 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21063 return MMA_LDST(8, m32n8k16_store_d_s32);
21064 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21065 return MMA_LDST(8, m8n32k16_store_d_s32);
21066 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21067 return MMA_LDST(2, m8n8k32_store_d_s32);
21068 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21069 return MMA_LDST(2, m8n8k128_store_d_s32);
21070
21071 // Double MMA store
21072 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21073 return MMA_LDST(2, m8n8k4_store_d_f64);
21074
21075 // Alternate float MMA store
21076 case NVPTX::BI__mma_m16n16k8_st_c_f32:
21077 return MMA_LDST(8, m16n16k8_store_d_f32);
21078
21079 default:
21080 llvm_unreachable("Unknown MMA builtin");
21081 }
21082}
21083#undef MMA_LDST
21084#undef MMA_INTR
21085
21086
21087struct NVPTXMmaInfo {
21088 unsigned NumEltsA;
21089 unsigned NumEltsB;
21090 unsigned NumEltsC;
21091 unsigned NumEltsD;
21092
21093 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
21094 // over 'col' for layout. The index of non-satf variants is expected to match
21095 // the undocumented layout constants used by CUDA's mma.hpp.
21096 std::array<unsigned, 8> Variants;
21097
21098 unsigned getMMAIntrinsic(int Layout, bool Satf) {
21099 unsigned Index = Layout + 4 * Satf;
21100 if (Index >= Variants.size())
21101 return 0;
21102 return Variants[Index];
21103 }
21104};
21105
21106 // Returns an intrinsic that matches Layout and Satf for valid combinations of
21107 // Layout and Satf, 0 otherwise.
21108static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
21109 // clang-format off
21110#define MMA_VARIANTS(geom, type) \
21111 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
21112 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21113 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
21114 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
21115#define MMA_SATF_VARIANTS(geom, type) \
21116 MMA_VARIANTS(geom, type), \
21117 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
21118 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21119 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
21120 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
21121// Sub-integer MMA only supports row.col layout.
21122#define MMA_VARIANTS_I4(geom, type) \
21123 0, \
21124 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21125 0, \
21126 0, \
21127 0, \
21128 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21129 0, \
21130 0
21131// b1 MMA does not support .satfinite.
21132#define MMA_VARIANTS_B1_XOR(geom, type) \
21133 0, \
21134 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
21135 0, \
21136 0, \
21137 0, \
21138 0, \
21139 0, \
21140 0
21141#define MMA_VARIANTS_B1_AND(geom, type) \
21142 0, \
21143 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
21144 0, \
21145 0, \
21146 0, \
21147 0, \
21148 0, \
21149 0
21150 // clang-format on
21151 switch (BuiltinID) {
21152 // FP MMA
21153 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
21154 // NumEltsN of return value are ordered as A,B,C,D.
21155 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21156 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
21157 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21158 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
21159 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21160 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
21161 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21162 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
21163 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21164 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
21165 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21166 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
21167 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21168 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
21169 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21170 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
21171 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21172 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
21173 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21174 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
21175 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21176 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
21177 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21178 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
21179
21180 // Integer MMA
21181 case NVPTX::BI__imma_m16n16k16_mma_s8:
21182 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
21183 case NVPTX::BI__imma_m16n16k16_mma_u8:
21184 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
21185 case NVPTX::BI__imma_m32n8k16_mma_s8:
21186 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
21187 case NVPTX::BI__imma_m32n8k16_mma_u8:
21188 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
21189 case NVPTX::BI__imma_m8n32k16_mma_s8:
21190 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
21191 case NVPTX::BI__imma_m8n32k16_mma_u8:
21192 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
21193
21194 // Sub-integer MMA
21195 case NVPTX::BI__imma_m8n8k32_mma_s4:
21196 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
21197 case NVPTX::BI__imma_m8n8k32_mma_u4:
21198 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
21199 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21200 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
21201 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21202 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
21203
21204 // Double MMA
21205 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21206 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
21207
21208 // Alternate FP MMA
21209 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21210 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
21211 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21212 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
21213 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21214 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
21215 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
21216 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
21217 default:
21218 llvm_unreachable("Unexpected builtin ID.");
21219 }
21220#undef MMA_VARIANTS
21221#undef MMA_SATF_VARIANTS
21222#undef MMA_VARIANTS_I4
21223#undef MMA_VARIANTS_B1_AND
21224#undef MMA_VARIANTS_B1_XOR
21225}
21226
21227static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
21228 const CallExpr *E) {
21229 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21230 QualType ArgType = E->getArg(0)->getType();
21232 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21233 return CGF.Builder.CreateCall(
21234 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21235 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
21236}
21237
21238static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
21239 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21240 QualType ArgType = E->getArg(0)->getType();
21242 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21243
21244 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
21245 auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
21246 auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
21247 MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
21248 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
21249
21250 return LD;
21251}
21252
21253static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
21254 const CallExpr *E) {
21255 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21256 llvm::Type *ElemTy =
21257 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21258 return CGF.Builder.CreateCall(
21259 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21260 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
21261}
21262
21263static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
21264 CodeGenFunction &CGF, const CallExpr *E,
21265 int SrcSize) {
21266 return E->getNumArgs() == 3
21267 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
21268 {CGF.EmitScalarExpr(E->getArg(0)),
21269 CGF.EmitScalarExpr(E->getArg(1)),
21270 CGF.EmitScalarExpr(E->getArg(2))})
21271 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
21272 {CGF.EmitScalarExpr(E->getArg(0)),
21273 CGF.EmitScalarExpr(E->getArg(1))});
21274}
21275
21276static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
21277 const CallExpr *E, CodeGenFunction &CGF) {
21278 auto &C = CGF.CGM.getContext();
21279 if (!(C.getLangOpts().NativeHalfType ||
21280 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
21281 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
21282 " requires native half type support.");
21283 return nullptr;
21284 }
21285
21286 if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
21287 return MakeLdg(CGF, E);
21288
21289 if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
21290 return MakeLdu(IntrinsicID, CGF, E);
21291
21293 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
21294 auto *FTy = F->getFunctionType();
21295 unsigned ICEArguments = 0;
21297 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
21298 assert(Error == ASTContext::GE_None && "Should not codegen an error");
21299 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
21300 assert((ICEArguments & (1 << i)) == 0);
21301 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
21302 auto *PTy = FTy->getParamType(i);
21303 if (PTy != ArgValue->getType())
21304 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
21305 Args.push_back(ArgValue);
21306 }
21307
21308 return CGF.Builder.CreateCall(F, Args);
21309}
21310} // namespace
21311
21313 const CallExpr *E) {
21314 switch (BuiltinID) {
21315 case NVPTX::BI__nvvm_atom_add_gen_i:
21316 case NVPTX::BI__nvvm_atom_add_gen_l:
21317 case NVPTX::BI__nvvm_atom_add_gen_ll:
21318 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
21319
21320 case NVPTX::BI__nvvm_atom_sub_gen_i:
21321 case NVPTX::BI__nvvm_atom_sub_gen_l:
21322 case NVPTX::BI__nvvm_atom_sub_gen_ll:
21323 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
21324
21325 case NVPTX::BI__nvvm_atom_and_gen_i:
21326 case NVPTX::BI__nvvm_atom_and_gen_l:
21327 case NVPTX::BI__nvvm_atom_and_gen_ll:
21328 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
21329
21330 case NVPTX::BI__nvvm_atom_or_gen_i:
21331 case NVPTX::BI__nvvm_atom_or_gen_l:
21332 case NVPTX::BI__nvvm_atom_or_gen_ll:
21333 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
21334
21335 case NVPTX::BI__nvvm_atom_xor_gen_i:
21336 case NVPTX::BI__nvvm_atom_xor_gen_l:
21337 case NVPTX::BI__nvvm_atom_xor_gen_ll:
21338 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
21339
21340 case NVPTX::BI__nvvm_atom_xchg_gen_i:
21341 case NVPTX::BI__nvvm_atom_xchg_gen_l:
21342 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
21343 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
21344
21345 case NVPTX::BI__nvvm_atom_max_gen_i:
21346 case NVPTX::BI__nvvm_atom_max_gen_l:
21347 case NVPTX::BI__nvvm_atom_max_gen_ll:
21348 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
21349
21350 case NVPTX::BI__nvvm_atom_max_gen_ui:
21351 case NVPTX::BI__nvvm_atom_max_gen_ul:
21352 case NVPTX::BI__nvvm_atom_max_gen_ull:
21353 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
21354
21355 case NVPTX::BI__nvvm_atom_min_gen_i:
21356 case NVPTX::BI__nvvm_atom_min_gen_l:
21357 case NVPTX::BI__nvvm_atom_min_gen_ll:
21358 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
21359
21360 case NVPTX::BI__nvvm_atom_min_gen_ui:
21361 case NVPTX::BI__nvvm_atom_min_gen_ul:
21362 case NVPTX::BI__nvvm_atom_min_gen_ull:
21363 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
21364
21365 case NVPTX::BI__nvvm_atom_cas_gen_us:
21366 case NVPTX::BI__nvvm_atom_cas_gen_i:
21367 case NVPTX::BI__nvvm_atom_cas_gen_l:
21368 case NVPTX::BI__nvvm_atom_cas_gen_ll:
21369 // __nvvm_atom_cas_gen_* should return the old value rather than the
21370 // success flag.
21371 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
21372
21373 case NVPTX::BI__nvvm_atom_add_gen_f:
21374 case NVPTX::BI__nvvm_atom_add_gen_d: {
21375 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
21376 Value *Val = EmitScalarExpr(E->getArg(1));
21377
21378 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
21379 AtomicOrdering::SequentiallyConsistent);
21380 }
21381
21382 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
21383 Value *Ptr = EmitScalarExpr(E->getArg(0));
21384 Value *Val = EmitScalarExpr(E->getArg(1));
21385 Function *FnALI32 =
21386 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
21387 return Builder.CreateCall(FnALI32, {Ptr, Val});
21388 }
21389
21390 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
21391 Value *Ptr = EmitScalarExpr(E->getArg(0));
21392 Value *Val = EmitScalarExpr(E->getArg(1));
21393 Function *FnALD32 =
21394 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
21395 return Builder.CreateCall(FnALD32, {Ptr, Val});
21396 }
21397
21398 case NVPTX::BI__nvvm_ldg_c:
21399 case NVPTX::BI__nvvm_ldg_sc:
21400 case NVPTX::BI__nvvm_ldg_c2:
21401 case NVPTX::BI__nvvm_ldg_sc2:
21402 case NVPTX::BI__nvvm_ldg_c4:
21403 case NVPTX::BI__nvvm_ldg_sc4:
21404 case NVPTX::BI__nvvm_ldg_s:
21405 case NVPTX::BI__nvvm_ldg_s2:
21406 case NVPTX::BI__nvvm_ldg_s4:
21407 case NVPTX::BI__nvvm_ldg_i:
21408 case NVPTX::BI__nvvm_ldg_i2:
21409 case NVPTX::BI__nvvm_ldg_i4:
21410 case NVPTX::BI__nvvm_ldg_l:
21411 case NVPTX::BI__nvvm_ldg_l2:
21412 case NVPTX::BI__nvvm_ldg_ll:
21413 case NVPTX::BI__nvvm_ldg_ll2:
21414 case NVPTX::BI__nvvm_ldg_uc:
21415 case NVPTX::BI__nvvm_ldg_uc2:
21416 case NVPTX::BI__nvvm_ldg_uc4:
21417 case NVPTX::BI__nvvm_ldg_us:
21418 case NVPTX::BI__nvvm_ldg_us2:
21419 case NVPTX::BI__nvvm_ldg_us4:
21420 case NVPTX::BI__nvvm_ldg_ui:
21421 case NVPTX::BI__nvvm_ldg_ui2:
21422 case NVPTX::BI__nvvm_ldg_ui4:
21423 case NVPTX::BI__nvvm_ldg_ul:
21424 case NVPTX::BI__nvvm_ldg_ul2:
21425 case NVPTX::BI__nvvm_ldg_ull:
21426 case NVPTX::BI__nvvm_ldg_ull2:
21427 case NVPTX::BI__nvvm_ldg_f:
21428 case NVPTX::BI__nvvm_ldg_f2:
21429 case NVPTX::BI__nvvm_ldg_f4:
21430 case NVPTX::BI__nvvm_ldg_d:
21431 case NVPTX::BI__nvvm_ldg_d2:
21432 // PTX Interoperability section 2.2: "For a vector with an even number of
21433 // elements, its alignment is set to number of elements times the alignment
21434 // of its member: n*alignof(t)."
21435 return MakeLdg(*this, E);
21436
21437 case NVPTX::BI__nvvm_ldu_c:
21438 case NVPTX::BI__nvvm_ldu_sc:
21439 case NVPTX::BI__nvvm_ldu_c2:
21440 case NVPTX::BI__nvvm_ldu_sc2:
21441 case NVPTX::BI__nvvm_ldu_c4:
21442 case NVPTX::BI__nvvm_ldu_sc4:
21443 case NVPTX::BI__nvvm_ldu_s:
21444 case NVPTX::BI__nvvm_ldu_s2:
21445 case NVPTX::BI__nvvm_ldu_s4:
21446 case NVPTX::BI__nvvm_ldu_i:
21447 case NVPTX::BI__nvvm_ldu_i2:
21448 case NVPTX::BI__nvvm_ldu_i4:
21449 case NVPTX::BI__nvvm_ldu_l:
21450 case NVPTX::BI__nvvm_ldu_l2:
21451 case NVPTX::BI__nvvm_ldu_ll:
21452 case NVPTX::BI__nvvm_ldu_ll2:
21453 case NVPTX::BI__nvvm_ldu_uc:
21454 case NVPTX::BI__nvvm_ldu_uc2:
21455 case NVPTX::BI__nvvm_ldu_uc4:
21456 case NVPTX::BI__nvvm_ldu_us:
21457 case NVPTX::BI__nvvm_ldu_us2:
21458 case NVPTX::BI__nvvm_ldu_us4:
21459 case NVPTX::BI__nvvm_ldu_ui:
21460 case NVPTX::BI__nvvm_ldu_ui2:
21461 case NVPTX::BI__nvvm_ldu_ui4:
21462 case NVPTX::BI__nvvm_ldu_ul:
21463 case NVPTX::BI__nvvm_ldu_ul2:
21464 case NVPTX::BI__nvvm_ldu_ull:
21465 case NVPTX::BI__nvvm_ldu_ull2:
21466 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
21467 case NVPTX::BI__nvvm_ldu_f:
21468 case NVPTX::BI__nvvm_ldu_f2:
21469 case NVPTX::BI__nvvm_ldu_f4:
21470 case NVPTX::BI__nvvm_ldu_d:
21471 case NVPTX::BI__nvvm_ldu_d2:
21472 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
21473
21474 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
21475 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
21476 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
21477 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
21478 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
21479 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
21480 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
21481 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
21482 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
21483 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
21484 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
21485 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
21486 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
21487 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
21488 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
21489 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
21490 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
21491 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
21492 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
21493 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
21494 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
21495 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
21496 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
21497 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
21498 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
21499 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
21500 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
21501 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
21502 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
21503 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
21504 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
21505 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
21506 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
21507 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
21508 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
21509 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
21510 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
21511 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
21512 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
21513 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
21514 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
21515 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
21516 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
21517 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
21518 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
21519 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
21520 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
21521 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
21522 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
21523 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
21524 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
21525 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
21526 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
21527 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
21528 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
21529 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
21530 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
21531 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
21532 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
21533 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
21534 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
21535 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
21536 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
21537 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
21538 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
21539 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
21540 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
21541 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
21542 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
21543 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
21544 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
21545 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
21546 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
21547 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
21548 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
21549 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
21550 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
21551 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
21552 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
21553 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
21554 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
21555 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
21556 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
21557 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
21558 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
21559 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
21560 Value *Ptr = EmitScalarExpr(E->getArg(0));
21561 llvm::Type *ElemTy =
21562 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21563 return Builder.CreateCall(
21565 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
21566 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21567 }
21568 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
21569 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
21570 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
21571 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
21572 Value *Ptr = EmitScalarExpr(E->getArg(0));
21573 llvm::Type *ElemTy =
21574 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21575 return Builder.CreateCall(
21577 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
21578 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21579 }
21580 case NVPTX::BI__nvvm_match_all_sync_i32p:
21581 case NVPTX::BI__nvvm_match_all_sync_i64p: {
21582 Value *Mask = EmitScalarExpr(E->getArg(0));
21583 Value *Val = EmitScalarExpr(E->getArg(1));
21584 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
21585 Value *ResultPair = Builder.CreateCall(
21586 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
21587 ? Intrinsic::nvvm_match_all_sync_i32p
21588 : Intrinsic::nvvm_match_all_sync_i64p),
21589 {Mask, Val});
21590 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
21591 PredOutPtr.getElementType());
21592 Builder.CreateStore(Pred, PredOutPtr);
21593 return Builder.CreateExtractValue(ResultPair, 0);
21594 }
21595
21596 // FP MMA loads
21597 case NVPTX::BI__hmma_m16n16k16_ld_a:
21598 case NVPTX::BI__hmma_m16n16k16_ld_b:
21599 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
21600 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
21601 case NVPTX::BI__hmma_m32n8k16_ld_a:
21602 case NVPTX::BI__hmma_m32n8k16_ld_b:
21603 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
21604 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
21605 case NVPTX::BI__hmma_m8n32k16_ld_a:
21606 case NVPTX::BI__hmma_m8n32k16_ld_b:
21607 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
21608 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
21609 // Integer MMA loads.
21610 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
21611 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
21612 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
21613 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
21614 case NVPTX::BI__imma_m16n16k16_ld_c:
21615 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
21616 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
21617 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
21618 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
21619 case NVPTX::BI__imma_m32n8k16_ld_c:
21620 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
21621 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
21622 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
21623 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
21624 case NVPTX::BI__imma_m8n32k16_ld_c:
21625 // Sub-integer MMA loads.
21626 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21627 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21628 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21629 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21630 case NVPTX::BI__imma_m8n8k32_ld_c:
21631 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21632 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21633 case NVPTX::BI__bmma_m8n8k128_ld_c:
21634 // Double MMA loads.
21635 case NVPTX::BI__dmma_m8n8k4_ld_a:
21636 case NVPTX::BI__dmma_m8n8k4_ld_b:
21637 case NVPTX::BI__dmma_m8n8k4_ld_c:
21638 // Alternate float MMA loads.
21639 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21640 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21641 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21642 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21643 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21644 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21645 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21646 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21647 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
21648 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21649 Value *Src = EmitScalarExpr(E->getArg(1));
21650 Value *Ldm = EmitScalarExpr(E->getArg(2));
21651 std::optional<llvm::APSInt> isColMajorArg =
21652 E->getArg(3)->getIntegerConstantExpr(getContext());
21653 if (!isColMajorArg)
21654 return nullptr;
21655 bool isColMajor = isColMajorArg->getSExtValue();
21656 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21657 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21658 if (IID == 0)
21659 return nullptr;
21660
21661 Value *Result =
21662 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
21663
21664 // Save returned values.
21665 assert(II.NumResults);
21666 if (II.NumResults == 1) {
21669 } else {
21670 for (unsigned i = 0; i < II.NumResults; ++i) {
21672 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
21673 Dst.getElementType()),
21675 llvm::ConstantInt::get(IntTy, i)),
21677 }
21678 }
21679 return Result;
21680 }
21681
21682 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21683 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21684 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21685 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21686 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21687 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21688 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21689 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21690 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21691 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21692 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21693 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21694 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
21695 Value *Dst = EmitScalarExpr(E->getArg(0));
21696 Address Src = EmitPointerWithAlignment(E->getArg(1));
21697 Value *Ldm = EmitScalarExpr(E->getArg(2));
21698 std::optional<llvm::APSInt> isColMajorArg =
21699 E->getArg(3)->getIntegerConstantExpr(getContext());
21700 if (!isColMajorArg)
21701 return nullptr;
21702 bool isColMajor = isColMajorArg->getSExtValue();
21703 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21704 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21705 if (IID == 0)
21706 return nullptr;
21707 Function *Intrinsic =
21708 CGM.getIntrinsic(IID, Dst->getType());
21709 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
21710 SmallVector<Value *, 10> Values = {Dst};
21711 for (unsigned i = 0; i < II.NumResults; ++i) {
21713 Src.getElementType(),
21715 llvm::ConstantInt::get(IntTy, i)),
21717 Values.push_back(Builder.CreateBitCast(V, ParamType));
21718 }
21719 Values.push_back(Ldm);
21720 Value *Result = Builder.CreateCall(Intrinsic, Values);
21721 return Result;
21722 }
21723
21724 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
21725 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
21726 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21727 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21728 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21729 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21730 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21731 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21732 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21733 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21734 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21735 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21736 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21737 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21738 case NVPTX::BI__imma_m16n16k16_mma_s8:
21739 case NVPTX::BI__imma_m16n16k16_mma_u8:
21740 case NVPTX::BI__imma_m32n8k16_mma_s8:
21741 case NVPTX::BI__imma_m32n8k16_mma_u8:
21742 case NVPTX::BI__imma_m8n32k16_mma_s8:
21743 case NVPTX::BI__imma_m8n32k16_mma_u8:
21744 case NVPTX::BI__imma_m8n8k32_mma_s4:
21745 case NVPTX::BI__imma_m8n8k32_mma_u4:
21746 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21747 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21748 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21749 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21750 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21751 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21752 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
21753 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21754 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
21755 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
21756 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
21757 std::optional<llvm::APSInt> LayoutArg =
21758 E->getArg(4)->getIntegerConstantExpr(getContext());
21759 if (!LayoutArg)
21760 return nullptr;
21761 int Layout = LayoutArg->getSExtValue();
21762 if (Layout < 0 || Layout > 3)
21763 return nullptr;
21764 llvm::APSInt SatfArg;
21765 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
21766 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
21767 SatfArg = 0; // .b1 does not have satf argument.
21768 else if (std::optional<llvm::APSInt> OptSatfArg =
21769 E->getArg(5)->getIntegerConstantExpr(getContext()))
21770 SatfArg = *OptSatfArg;
21771 else
21772 return nullptr;
21773 bool Satf = SatfArg.getSExtValue();
21774 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
21775 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
21776 if (IID == 0) // Unsupported combination of Layout/Satf.
21777 return nullptr;
21778
21780 Function *Intrinsic = CGM.getIntrinsic(IID);
21781 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
21782 // Load A
21783 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
21785 SrcA.getElementType(),
21786 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
21787 llvm::ConstantInt::get(IntTy, i)),
21789 Values.push_back(Builder.CreateBitCast(V, AType));
21790 }
21791 // Load B
21792 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
21793 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
21795 SrcB.getElementType(),
21796 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
21797 llvm::ConstantInt::get(IntTy, i)),
21799 Values.push_back(Builder.CreateBitCast(V, BType));
21800 }
21801 // Load C
21802 llvm::Type *CType =
21803 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
21804 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
21806 SrcC.getElementType(),
21807 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
21808 llvm::ConstantInt::get(IntTy, i)),
21810 Values.push_back(Builder.CreateBitCast(V, CType));
21811 }
21812 Value *Result = Builder.CreateCall(Intrinsic, Values);
21813 llvm::Type *DType = Dst.getElementType();
21814 for (unsigned i = 0; i < MI.NumEltsD; ++i)
21816 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
21818 llvm::ConstantInt::get(IntTy, i)),
21820 return Result;
21821 }
21822 // The following builtins require half type support
21823 case NVPTX::BI__nvvm_ex2_approx_f16:
21824 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
21825 case NVPTX::BI__nvvm_ex2_approx_f16x2:
21826 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
21827 case NVPTX::BI__nvvm_ff2f16x2_rn:
21828 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
21829 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
21830 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
21831 case NVPTX::BI__nvvm_ff2f16x2_rz:
21832 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
21833 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
21834 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
21835 case NVPTX::BI__nvvm_fma_rn_f16:
21836 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
21837 case NVPTX::BI__nvvm_fma_rn_f16x2:
21838 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
21839 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
21840 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
21841 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
21842 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
21843 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
21844 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
21845 *this);
21846 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
21847 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
21848 *this);
21849 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
21850 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
21851 *this);
21852 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
21853 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
21854 *this);
21855 case NVPTX::BI__nvvm_fma_rn_relu_f16:
21856 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
21857 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
21858 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
21859 case NVPTX::BI__nvvm_fma_rn_sat_f16:
21860 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
21861 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
21862 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
21863 case NVPTX::BI__nvvm_fmax_f16:
21864 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
21865 case NVPTX::BI__nvvm_fmax_f16x2:
21866 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
21867 case NVPTX::BI__nvvm_fmax_ftz_f16:
21868 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
21869 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
21870 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
21871 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
21872 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
21873 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
21874 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
21875 *this);
21876 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
21877 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
21878 E, *this);
21879 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
21880 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
21881 BuiltinID, E, *this);
21882 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
21883 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
21884 *this);
21885 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
21886 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
21887 E, *this);
21888 case NVPTX::BI__nvvm_fmax_nan_f16:
21889 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
21890 case NVPTX::BI__nvvm_fmax_nan_f16x2:
21891 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
21892 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
21893 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
21894 *this);
21895 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
21896 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
21897 E, *this);
21898 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
21899 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
21900 *this);
21901 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
21902 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
21903 *this);
21904 case NVPTX::BI__nvvm_fmin_f16:
21905 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
21906 case NVPTX::BI__nvvm_fmin_f16x2:
21907 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
21908 case NVPTX::BI__nvvm_fmin_ftz_f16:
21909 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
21910 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
21911 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
21912 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
21913 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
21914 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
21915 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
21916 *this);
21917 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
21918 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
21919 E, *this);
21920 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
21921 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
21922 BuiltinID, E, *this);
21923 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
21924 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
21925 *this);
21926 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
21927 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
21928 E, *this);
21929 case NVPTX::BI__nvvm_fmin_nan_f16:
21930 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
21931 case NVPTX::BI__nvvm_fmin_nan_f16x2:
21932 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
21933 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
21934 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
21935 *this);
21936 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
21937 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
21938 E, *this);
21939 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
21940 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
21941 *this);
21942 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
21943 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
21944 *this);
21945 case NVPTX::BI__nvvm_ldg_h:
21946 case NVPTX::BI__nvvm_ldg_h2:
21947 return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
21948 case NVPTX::BI__nvvm_ldu_h:
21949 case NVPTX::BI__nvvm_ldu_h2:
21950 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
21951 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
21952 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
21953 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
21954 4);
21955 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
21956 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
21957 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
21958 8);
21959 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
21960 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
21961 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
21962 16);
21963 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
21964 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
21965 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
21966 16);
21967 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
21968 return Builder.CreateCall(
21969 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
21970 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
21971 return Builder.CreateCall(
21972 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
21973 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
21974 return Builder.CreateCall(
21975 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
21976 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
21977 return Builder.CreateCall(
21978 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
21979 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
21980 return Builder.CreateCall(
21981 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
21982 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
21983 return Builder.CreateCall(
21984 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
21985 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
21986 return Builder.CreateCall(
21987 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
21988 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
21989 return Builder.CreateCall(
21990 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
21991 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
21992 return Builder.CreateCall(
21993 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
21994 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
21995 return Builder.CreateCall(
21996 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
21997 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
21998 return Builder.CreateCall(
21999 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
22000 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
22001 return Builder.CreateCall(
22002 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
22003 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
22004 return Builder.CreateCall(
22005 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
22006 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
22007 return Builder.CreateCall(
22008 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
22009 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
22010 return Builder.CreateCall(
22011 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
22012 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
22013 return Builder.CreateCall(
22014 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
22015 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
22016 return Builder.CreateCall(
22017 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
22018 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
22019 return Builder.CreateCall(
22020 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
22021 case NVPTX::BI__nvvm_is_explicit_cluster:
22022 return Builder.CreateCall(
22023 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
22024 case NVPTX::BI__nvvm_isspacep_shared_cluster:
22025 return Builder.CreateCall(
22026 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
22027 EmitScalarExpr(E->getArg(0)));
22028 case NVPTX::BI__nvvm_mapa:
22029 return Builder.CreateCall(
22030 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
22031 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22032 case NVPTX::BI__nvvm_mapa_shared_cluster:
22033 return Builder.CreateCall(
22034 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
22035 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22036 case NVPTX::BI__nvvm_getctarank:
22037 return Builder.CreateCall(
22038 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
22039 EmitScalarExpr(E->getArg(0)));
22040 case NVPTX::BI__nvvm_getctarank_shared_cluster:
22041 return Builder.CreateCall(
22042 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
22043 EmitScalarExpr(E->getArg(0)));
22044 case NVPTX::BI__nvvm_barrier_cluster_arrive:
22045 return Builder.CreateCall(
22046 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
22047 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
22048 return Builder.CreateCall(
22049 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
22050 case NVPTX::BI__nvvm_barrier_cluster_wait:
22051 return Builder.CreateCall(
22052 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
22053 case NVPTX::BI__nvvm_fence_sc_cluster:
22054 return Builder.CreateCall(
22055 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
22056 default:
22057 return nullptr;
22058 }
22059}
22060
22061namespace {
22062struct BuiltinAlignArgs {
22063 llvm::Value *Src = nullptr;
22064 llvm::Type *SrcType = nullptr;
22065 llvm::Value *Alignment = nullptr;
22066 llvm::Value *Mask = nullptr;
22067 llvm::IntegerType *IntType = nullptr;
22068
22069 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
22070 QualType AstType = E->getArg(0)->getType();
22071 if (AstType->isArrayType())
22072 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
22073 else
22074 Src = CGF.EmitScalarExpr(E->getArg(0));
22075 SrcType = Src->getType();
22076 if (SrcType->isPointerTy()) {
22077 IntType = IntegerType::get(
22078 CGF.getLLVMContext(),
22079 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
22080 } else {
22081 assert(SrcType->isIntegerTy());
22082 IntType = cast<llvm::IntegerType>(SrcType);
22083 }
22084 Alignment = CGF.EmitScalarExpr(E->getArg(1));
22085 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
22086 auto *One = llvm::ConstantInt::get(IntType, 1);
22087 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
22088 }
22089};
22090} // namespace
22091
22092/// Generate (x & (y-1)) == 0.
22094 BuiltinAlignArgs Args(E, *this);
22095 llvm::Value *SrcAddress = Args.Src;
22096 if (Args.SrcType->isPointerTy())
22097 SrcAddress =
22098 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
22099 return RValue::get(Builder.CreateICmpEQ(
22100 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
22101 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
22102}
22103
22104/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
22105/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
22106/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
22108 BuiltinAlignArgs Args(E, *this);
22109 llvm::Value *SrcForMask = Args.Src;
22110 if (AlignUp) {
22111 // When aligning up we have to first add the mask to ensure we go over the
22112 // next alignment value and then align down to the next valid multiple.
22113 // By adding the mask, we ensure that align_up on an already aligned
22114 // value will not change the value.
22115 if (Args.Src->getType()->isPointerTy()) {
22116 if (getLangOpts().isSignedOverflowDefined())
22117 SrcForMask =
22118 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
22119 else
22120 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
22121 /*SignedIndices=*/true,
22122 /*isSubtraction=*/false,
22123 E->getExprLoc(), "over_boundary");
22124 } else {
22125 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
22126 }
22127 }
22128 // Invert the mask to only clear the lower bits.
22129 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
22130 llvm::Value *Result = nullptr;
22131 if (Args.Src->getType()->isPointerTy()) {
22132 Result = Builder.CreateIntrinsic(
22133 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
22134 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
22135 } else {
22136 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
22137 }
22138 assert(Result->getType() == Args.SrcType);
22139 return RValue::get(Result);
22140}
22141
22143 const CallExpr *E) {
22144 switch (BuiltinID) {
22145 case WebAssembly::BI__builtin_wasm_memory_size: {
22146 llvm::Type *ResultType = ConvertType(E->getType());
22147 Value *I = EmitScalarExpr(E->getArg(0));
22148 Function *Callee =
22149 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
22150 return Builder.CreateCall(Callee, I);
22151 }
22152 case WebAssembly::BI__builtin_wasm_memory_grow: {
22153 llvm::Type *ResultType = ConvertType(E->getType());
22154 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
22155 EmitScalarExpr(E->getArg(1))};
22156 Function *Callee =
22157 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
22158 return Builder.CreateCall(Callee, Args);
22159 }
22160 case WebAssembly::BI__builtin_wasm_tls_size: {
22161 llvm::Type *ResultType = ConvertType(E->getType());
22162 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
22163 return Builder.CreateCall(Callee);
22164 }
22165 case WebAssembly::BI__builtin_wasm_tls_align: {
22166 llvm::Type *ResultType = ConvertType(E->getType());
22167 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
22168 return Builder.CreateCall(Callee);
22169 }
22170 case WebAssembly::BI__builtin_wasm_tls_base: {
22171 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
22172 return Builder.CreateCall(Callee);
22173 }
22174 case WebAssembly::BI__builtin_wasm_throw: {
22175 Value *Tag = EmitScalarExpr(E->getArg(0));
22176 Value *Obj = EmitScalarExpr(E->getArg(1));
22177 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
22178 return Builder.CreateCall(Callee, {Tag, Obj});
22179 }
22180 case WebAssembly::BI__builtin_wasm_rethrow: {
22181 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
22182 return Builder.CreateCall(Callee);
22183 }
22184 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
22185 Value *Addr = EmitScalarExpr(E->getArg(0));
22186 Value *Expected = EmitScalarExpr(E->getArg(1));
22187 Value *Timeout = EmitScalarExpr(E->getArg(2));
22188 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
22189 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22190 }
22191 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
22192 Value *Addr = EmitScalarExpr(E->getArg(0));
22193 Value *Expected = EmitScalarExpr(E->getArg(1));
22194 Value *Timeout = EmitScalarExpr(E->getArg(2));
22195 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
22196 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22197 }
22198 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
22199 Value *Addr = EmitScalarExpr(E->getArg(0));
22200 Value *Count = EmitScalarExpr(E->getArg(1));
22201 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
22202 return Builder.CreateCall(Callee, {Addr, Count});
22203 }
22204 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
22205 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
22206 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
22207 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
22208 Value *Src = EmitScalarExpr(E->getArg(0));
22209 llvm::Type *ResT = ConvertType(E->getType());
22210 Function *Callee =
22211 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
22212 return Builder.CreateCall(Callee, {Src});
22213 }
22214 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
22215 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
22216 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
22217 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
22218 Value *Src = EmitScalarExpr(E->getArg(0));
22219 llvm::Type *ResT = ConvertType(E->getType());
22220 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
22221 {ResT, Src->getType()});
22222 return Builder.CreateCall(Callee, {Src});
22223 }
22224 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
22225 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
22226 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
22227 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
22228 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
22229 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
22230 Value *Src = EmitScalarExpr(E->getArg(0));
22231 llvm::Type *ResT = ConvertType(E->getType());
22232 Function *Callee =
22233 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
22234 return Builder.CreateCall(Callee, {Src});
22235 }
22236 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
22237 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
22238 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
22239 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
22240 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
22241 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
22242 Value *Src = EmitScalarExpr(E->getArg(0));
22243 llvm::Type *ResT = ConvertType(E->getType());
22244 Function *Callee =
22245 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
22246 return Builder.CreateCall(Callee, {Src});
22247 }
22248 case WebAssembly::BI__builtin_wasm_min_f32:
22249 case WebAssembly::BI__builtin_wasm_min_f64:
22250 case WebAssembly::BI__builtin_wasm_min_f16x8:
22251 case WebAssembly::BI__builtin_wasm_min_f32x4:
22252 case WebAssembly::BI__builtin_wasm_min_f64x2: {
22253 Value *LHS = EmitScalarExpr(E->getArg(0));
22254 Value *RHS = EmitScalarExpr(E->getArg(1));
22255 Function *Callee =
22256 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
22257 return Builder.CreateCall(Callee, {LHS, RHS});
22258 }
22259 case WebAssembly::BI__builtin_wasm_max_f32:
22260 case WebAssembly::BI__builtin_wasm_max_f64:
22261 case WebAssembly::BI__builtin_wasm_max_f16x8:
22262 case WebAssembly::BI__builtin_wasm_max_f32x4:
22263 case WebAssembly::BI__builtin_wasm_max_f64x2: {
22264 Value *LHS = EmitScalarExpr(E->getArg(0));
22265 Value *RHS = EmitScalarExpr(E->getArg(1));
22266 Function *Callee =
22267 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
22268 return Builder.CreateCall(Callee, {LHS, RHS});
22269 }
22270 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
22271 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
22272 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
22273 Value *LHS = EmitScalarExpr(E->getArg(0));
22274 Value *RHS = EmitScalarExpr(E->getArg(1));
22275 Function *Callee =
22276 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
22277 return Builder.CreateCall(Callee, {LHS, RHS});
22278 }
22279 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
22280 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
22281 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
22282 Value *LHS = EmitScalarExpr(E->getArg(0));
22283 Value *RHS = EmitScalarExpr(E->getArg(1));
22284 Function *Callee =
22285 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
22286 return Builder.CreateCall(Callee, {LHS, RHS});
22287 }
22288 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22289 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22290 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22291 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22292 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22293 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22294 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22295 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22296 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22297 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22298 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22299 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
22300 unsigned IntNo;
22301 switch (BuiltinID) {
22302 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22303 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22304 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22305 IntNo = Intrinsic::ceil;
22306 break;
22307 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22308 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22309 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22310 IntNo = Intrinsic::floor;
22311 break;
22312 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22313 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22314 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22315 IntNo = Intrinsic::trunc;
22316 break;
22317 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22318 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22319 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
22320 IntNo = Intrinsic::nearbyint;
22321 break;
22322 default:
22323 llvm_unreachable("unexpected builtin ID");
22324 }
22325 Value *Value = EmitScalarExpr(E->getArg(0));
22327 return Builder.CreateCall(Callee, Value);
22328 }
22329 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
22330 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
22331 return Builder.CreateCall(Callee);
22332 }
22333 case WebAssembly::BI__builtin_wasm_ref_null_func: {
22334 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
22335 return Builder.CreateCall(Callee);
22336 }
22337 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
22338 Value *Src = EmitScalarExpr(E->getArg(0));
22339 Value *Indices = EmitScalarExpr(E->getArg(1));
22340 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
22341 return Builder.CreateCall(Callee, {Src, Indices});
22342 }
22343 case WebAssembly::BI__builtin_wasm_abs_i8x16:
22344 case WebAssembly::BI__builtin_wasm_abs_i16x8:
22345 case WebAssembly::BI__builtin_wasm_abs_i32x4:
22346 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
22347 Value *Vec = EmitScalarExpr(E->getArg(0));
22348 Value *Neg = Builder.CreateNeg(Vec, "neg");
22349 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
22350 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
22351 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
22352 }
22353 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
22354 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
22355 Value *LHS = EmitScalarExpr(E->getArg(0));
22356 Value *RHS = EmitScalarExpr(E->getArg(1));
22357 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
22358 ConvertType(E->getType()));
22359 return Builder.CreateCall(Callee, {LHS, RHS});
22360 }
22361 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
22362 Value *LHS = EmitScalarExpr(E->getArg(0));
22363 Value *RHS = EmitScalarExpr(E->getArg(1));
22364 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
22365 return Builder.CreateCall(Callee, {LHS, RHS});
22366 }
22367 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22368 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22369 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22370 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
22371 Value *Vec = EmitScalarExpr(E->getArg(0));
22372 unsigned IntNo;
22373 switch (BuiltinID) {
22374 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22375 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22376 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
22377 break;
22378 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22379 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
22380 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
22381 break;
22382 default:
22383 llvm_unreachable("unexpected builtin ID");
22384 }
22385
22387 return Builder.CreateCall(Callee, Vec);
22388 }
22389 case WebAssembly::BI__builtin_wasm_bitselect: {
22390 Value *V1 = EmitScalarExpr(E->getArg(0));
22391 Value *V2 = EmitScalarExpr(E->getArg(1));
22392 Value *C = EmitScalarExpr(E->getArg(2));
22393 Function *Callee =
22394 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
22395 return Builder.CreateCall(Callee, {V1, V2, C});
22396 }
22397 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
22398 Value *LHS = EmitScalarExpr(E->getArg(0));
22399 Value *RHS = EmitScalarExpr(E->getArg(1));
22400 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
22401 return Builder.CreateCall(Callee, {LHS, RHS});
22402 }
22403 case WebAssembly::BI__builtin_wasm_any_true_v128:
22404 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22405 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22406 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22407 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
22408 unsigned IntNo;
22409 switch (BuiltinID) {
22410 case WebAssembly::BI__builtin_wasm_any_true_v128:
22411 IntNo = Intrinsic::wasm_anytrue;
22412 break;
22413 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22414 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22415 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22416 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
22417 IntNo = Intrinsic::wasm_alltrue;
22418 break;
22419 default:
22420 llvm_unreachable("unexpected builtin ID");
22421 }
22422 Value *Vec = EmitScalarExpr(E->getArg(0));
22423 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
22424 return Builder.CreateCall(Callee, {Vec});
22425 }
22426 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
22427 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
22428 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
22429 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
22430 Value *Vec = EmitScalarExpr(E->getArg(0));
22431 Function *Callee =
22432 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
22433 return Builder.CreateCall(Callee, {Vec});
22434 }
22435 case WebAssembly::BI__builtin_wasm_abs_f16x8:
22436 case WebAssembly::BI__builtin_wasm_abs_f32x4:
22437 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
22438 Value *Vec = EmitScalarExpr(E->getArg(0));
22439 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
22440 return Builder.CreateCall(Callee, {Vec});
22441 }
22442 case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
22443 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
22444 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
22445 Value *Vec = EmitScalarExpr(E->getArg(0));
22446 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
22447 return Builder.CreateCall(Callee, {Vec});
22448 }
22449 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22450 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22451 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22452 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
22453 Value *Low = EmitScalarExpr(E->getArg(0));
22454 Value *High = EmitScalarExpr(E->getArg(1));
22455 unsigned IntNo;
22456 switch (BuiltinID) {
22457 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22458 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22459 IntNo = Intrinsic::wasm_narrow_signed;
22460 break;
22461 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22462 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
22463 IntNo = Intrinsic::wasm_narrow_unsigned;
22464 break;
22465 default:
22466 llvm_unreachable("unexpected builtin ID");
22467 }
22468 Function *Callee =
22469 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
22470 return Builder.CreateCall(Callee, {Low, High});
22471 }
22472 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22473 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
22474 Value *Vec = EmitScalarExpr(E->getArg(0));
22475 unsigned IntNo;
22476 switch (BuiltinID) {
22477 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22478 IntNo = Intrinsic::fptosi_sat;
22479 break;
22480 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
22481 IntNo = Intrinsic::fptoui_sat;
22482 break;
22483 default:
22484 llvm_unreachable("unexpected builtin ID");
22485 }
22486 llvm::Type *SrcT = Vec->getType();
22487 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
22488 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
22489 Value *Trunc = Builder.CreateCall(Callee, Vec);
22490 Value *Splat = Constant::getNullValue(TruncT);
22491 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
22492 }
22493 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
22494 Value *Ops[18];
22495 size_t OpIdx = 0;
22496 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
22497 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
22498 while (OpIdx < 18) {
22499 std::optional<llvm::APSInt> LaneConst =
22500 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
22501 assert(LaneConst && "Constant arg isn't actually constant?");
22502 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
22503 }
22504 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
22505 return Builder.CreateCall(Callee, Ops);
22506 }
22507 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22508 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22509 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22510 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22511 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22512 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
22513 Value *A = EmitScalarExpr(E->getArg(0));
22514 Value *B = EmitScalarExpr(E->getArg(1));
22515 Value *C = EmitScalarExpr(E->getArg(2));
22516 unsigned IntNo;
22517 switch (BuiltinID) {
22518 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22519 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22520 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22521 IntNo = Intrinsic::wasm_relaxed_madd;
22522 break;
22523 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22524 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22525 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
22526 IntNo = Intrinsic::wasm_relaxed_nmadd;
22527 break;
22528 default:
22529 llvm_unreachable("unexpected builtin ID");
22530 }
22531 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
22532 return Builder.CreateCall(Callee, {A, B, C});
22533 }
22534 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
22535 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
22536 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
22537 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
22538 Value *A = EmitScalarExpr(E->getArg(0));
22539 Value *B = EmitScalarExpr(E->getArg(1));
22540 Value *C = EmitScalarExpr(E->getArg(2));
22541 Function *Callee =
22542 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
22543 return Builder.CreateCall(Callee, {A, B, C});
22544 }
22545 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
22546 Value *Src = EmitScalarExpr(E->getArg(0));
22547 Value *Indices = EmitScalarExpr(E->getArg(1));
22548 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
22549 return Builder.CreateCall(Callee, {Src, Indices});
22550 }
22551 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22552 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22553 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22554 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
22555 Value *LHS = EmitScalarExpr(E->getArg(0));
22556 Value *RHS = EmitScalarExpr(E->getArg(1));
22557 unsigned IntNo;
22558 switch (BuiltinID) {
22559 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22560 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22561 IntNo = Intrinsic::wasm_relaxed_min;
22562 break;
22563 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22564 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
22565 IntNo = Intrinsic::wasm_relaxed_max;
22566 break;
22567 default:
22568 llvm_unreachable("unexpected builtin ID");
22569 }
22570 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
22571 return Builder.CreateCall(Callee, {LHS, RHS});
22572 }
22573 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22574 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22575 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22576 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
22577 Value *Vec = EmitScalarExpr(E->getArg(0));
22578 unsigned IntNo;
22579 switch (BuiltinID) {
22580 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22581 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
22582 break;
22583 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22584 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
22585 break;
22586 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22587 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
22588 break;
22589 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
22590 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
22591 break;
22592 default:
22593 llvm_unreachable("unexpected builtin ID");
22594 }
22595 Function *Callee = CGM.getIntrinsic(IntNo);
22596 return Builder.CreateCall(Callee, {Vec});
22597 }
22598 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
22599 Value *LHS = EmitScalarExpr(E->getArg(0));
22600 Value *RHS = EmitScalarExpr(E->getArg(1));
22601 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
22602 return Builder.CreateCall(Callee, {LHS, RHS});
22603 }
22604 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
22605 Value *LHS = EmitScalarExpr(E->getArg(0));
22606 Value *RHS = EmitScalarExpr(E->getArg(1));
22607 Function *Callee =
22608 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
22609 return Builder.CreateCall(Callee, {LHS, RHS});
22610 }
22611 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
22612 Value *LHS = EmitScalarExpr(E->getArg(0));
22613 Value *RHS = EmitScalarExpr(E->getArg(1));
22614 Value *Acc = EmitScalarExpr(E->getArg(2));
22615 Function *Callee =
22616 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
22617 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22618 }
22619 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
22620 Value *LHS = EmitScalarExpr(E->getArg(0));
22621 Value *RHS = EmitScalarExpr(E->getArg(1));
22622 Value *Acc = EmitScalarExpr(E->getArg(2));
22623 Function *Callee =
22624 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
22625 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22626 }
22627 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
22628 Value *Addr = EmitScalarExpr(E->getArg(0));
22629 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
22630 return Builder.CreateCall(Callee, {Addr});
22631 }
22632 case WebAssembly::BI__builtin_wasm_storef16_f32: {
22633 Value *Val = EmitScalarExpr(E->getArg(0));
22634 Value *Addr = EmitScalarExpr(E->getArg(1));
22635 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
22636 return Builder.CreateCall(Callee, {Val, Addr});
22637 }
22638 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
22639 Value *Val = EmitScalarExpr(E->getArg(0));
22640 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
22641 return Builder.CreateCall(Callee, {Val});
22642 }
22643 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
22644 Value *Vector = EmitScalarExpr(E->getArg(0));
22645 Value *Index = EmitScalarExpr(E->getArg(1));
22646 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
22647 return Builder.CreateCall(Callee, {Vector, Index});
22648 }
22649 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
22650 Value *Vector = EmitScalarExpr(E->getArg(0));
22651 Value *Index = EmitScalarExpr(E->getArg(1));
22652 Value *Val = EmitScalarExpr(E->getArg(2));
22653 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
22654 return Builder.CreateCall(Callee, {Vector, Index, Val});
22655 }
22656 case WebAssembly::BI__builtin_wasm_table_get: {
22657 assert(E->getArg(0)->getType()->isArrayType());
22658 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22659 Value *Index = EmitScalarExpr(E->getArg(1));
22662 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
22663 else if (E->getType().isWebAssemblyFuncrefType())
22664 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
22665 else
22666 llvm_unreachable(
22667 "Unexpected reference type for __builtin_wasm_table_get");
22668 return Builder.CreateCall(Callee, {Table, Index});
22669 }
22670 case WebAssembly::BI__builtin_wasm_table_set: {
22671 assert(E->getArg(0)->getType()->isArrayType());
22672 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22673 Value *Index = EmitScalarExpr(E->getArg(1));
22674 Value *Val = EmitScalarExpr(E->getArg(2));
22676 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22677 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
22678 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22679 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
22680 else
22681 llvm_unreachable(
22682 "Unexpected reference type for __builtin_wasm_table_set");
22683 return Builder.CreateCall(Callee, {Table, Index, Val});
22684 }
22685 case WebAssembly::BI__builtin_wasm_table_size: {
22686 assert(E->getArg(0)->getType()->isArrayType());
22687 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22688 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
22689 return Builder.CreateCall(Callee, Value);
22690 }
22691 case WebAssembly::BI__builtin_wasm_table_grow: {
22692 assert(E->getArg(0)->getType()->isArrayType());
22693 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22694 Value *Val = EmitScalarExpr(E->getArg(1));
22695 Value *NElems = EmitScalarExpr(E->getArg(2));
22696
22698 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
22699 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
22700 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22701 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22702 else
22703 llvm_unreachable(
22704 "Unexpected reference type for __builtin_wasm_table_grow");
22705
22706 return Builder.CreateCall(Callee, {Table, Val, NElems});
22707 }
22708 case WebAssembly::BI__builtin_wasm_table_fill: {
22709 assert(E->getArg(0)->getType()->isArrayType());
22710 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22711 Value *Index = EmitScalarExpr(E->getArg(1));
22712 Value *Val = EmitScalarExpr(E->getArg(2));
22713 Value *NElems = EmitScalarExpr(E->getArg(3));
22714
22716 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22717 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
22718 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22719 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22720 else
22721 llvm_unreachable(
22722 "Unexpected reference type for __builtin_wasm_table_fill");
22723
22724 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
22725 }
22726 case WebAssembly::BI__builtin_wasm_table_copy: {
22727 assert(E->getArg(0)->getType()->isArrayType());
22728 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22729 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
22730 Value *DstIdx = EmitScalarExpr(E->getArg(2));
22731 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
22732 Value *NElems = EmitScalarExpr(E->getArg(4));
22733
22734 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
22735
22736 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
22737 }
22738 default:
22739 return nullptr;
22740 }
22741}
22742
22743static std::pair<Intrinsic::ID, unsigned>
22745 struct Info {
22746 unsigned BuiltinID;
22747 Intrinsic::ID IntrinsicID;
22748 unsigned VecLen;
22749 };
22750 static Info Infos[] = {
22751#define CUSTOM_BUILTIN_MAPPING(x,s) \
22752 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
22753 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
22754 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
22755 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
22756 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
22757 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
22758 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
22759 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
22760 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
22761 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
22762 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
22763 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
22764 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
22765 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
22766 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
22767 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
22768 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
22769 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
22770 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
22771 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
22772 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
22773 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
22774 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
22775 // Legacy builtins that take a vector in place of a vector predicate.
22776 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
22777 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
22778 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
22779 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
22780 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
22781 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
22782 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
22783 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
22784#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
22785#undef CUSTOM_BUILTIN_MAPPING
22786 };
22787
22788 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
22789 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
22790 (void)SortOnce;
22791
22792 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
22793 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
22794 return {Intrinsic::not_intrinsic, 0};
22795
22796 return {F->IntrinsicID, F->VecLen};
22797}
22798
22800 const CallExpr *E) {
22801 Intrinsic::ID ID;
22802 unsigned VecLen;
22803 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
22804
22805 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
22806 // The base pointer is passed by address, so it needs to be loaded.
22807 Address A = EmitPointerWithAlignment(E->getArg(0));
22809 llvm::Value *Base = Builder.CreateLoad(BP);
22810 // The treatment of both loads and stores is the same: the arguments for
22811 // the builtin are the same as the arguments for the intrinsic.
22812 // Load:
22813 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
22814 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
22815 // Store:
22816 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
22817 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
22819 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
22820 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22821
22822 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
22823 // The load intrinsics generate two results (Value, NewBase), stores
22824 // generate one (NewBase). The new base address needs to be stored.
22825 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
22826 : Result;
22827 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
22828 Address Dest = EmitPointerWithAlignment(E->getArg(0));
22829 llvm::Value *RetVal =
22830 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
22831 if (IsLoad)
22832 RetVal = Builder.CreateExtractValue(Result, 0);
22833 return RetVal;
22834 };
22835
22836 // Handle the conversion of bit-reverse load intrinsics to bit code.
22837 // The intrinsic call after this function only reads from memory and the
22838 // write to memory is dealt by the store instruction.
22839 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
22840 // The intrinsic generates one result, which is the new value for the base
22841 // pointer. It needs to be returned. The result of the load instruction is
22842 // passed to intrinsic by address, so the value needs to be stored.
22843 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
22844
22845 // Expressions like &(*pt++) will be incremented per evaluation.
22846 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
22847 // per call.
22848 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
22849 DestAddr = DestAddr.withElementType(Int8Ty);
22850 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
22851
22852 // Operands are Base, Dest, Modifier.
22853 // The intrinsic format in LLVM IR is defined as
22854 // { ValueType, i8* } (i8*, i32).
22855 llvm::Value *Result = Builder.CreateCall(
22856 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
22857
22858 // The value needs to be stored as the variable is passed by reference.
22859 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
22860
22861 // The store needs to be truncated to fit the destination type.
22862 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
22863 // to be handled with stores of respective destination type.
22864 DestVal = Builder.CreateTrunc(DestVal, DestTy);
22865
22866 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
22867 // The updated value of the base pointer is returned.
22868 return Builder.CreateExtractValue(Result, 1);
22869 };
22870
22871 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
22872 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
22873 : Intrinsic::hexagon_V6_vandvrt;
22874 return Builder.CreateCall(CGM.getIntrinsic(ID),
22875 {Vec, Builder.getInt32(-1)});
22876 };
22877 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
22878 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
22879 : Intrinsic::hexagon_V6_vandqrt;
22880 return Builder.CreateCall(CGM.getIntrinsic(ID),
22881 {Pred, Builder.getInt32(-1)});
22882 };
22883
22884 switch (BuiltinID) {
22885 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
22886 // and the corresponding C/C++ builtins use loads/stores to update
22887 // the predicate.
22888 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
22889 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
22890 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
22891 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
22892 // Get the type from the 0-th argument.
22893 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22894 Address PredAddr =
22895 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22896 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
22897 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22898 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
22899
22900 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22901 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
22902 PredAddr.getAlignment());
22903 return Builder.CreateExtractValue(Result, 0);
22904 }
22905 // These are identical to the builtins above, except they don't consume
22906 // input carry, only generate carry-out. Since they still produce two
22907 // outputs, generate the store of the predicate, but no load.
22908 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
22909 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
22910 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
22911 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
22912 // Get the type from the 0-th argument.
22913 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22914 Address PredAddr =
22915 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22916 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22917 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22918
22919 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22920 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
22921 PredAddr.getAlignment());
22922 return Builder.CreateExtractValue(Result, 0);
22923 }
22924
22925 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
22926 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
22927 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
22928 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
22929 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
22930 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
22931 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
22932 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
22934 const Expr *PredOp = E->getArg(0);
22935 // There will be an implicit cast to a boolean vector. Strip it.
22936 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
22937 if (Cast->getCastKind() == CK_BitCast)
22938 PredOp = Cast->getSubExpr();
22939 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
22940 }
22941 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
22942 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22943 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
22944 }
22945
22946 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
22947 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
22948 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
22949 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
22950 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
22951 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
22952 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
22953 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
22954 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
22955 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
22956 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
22957 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
22958 return MakeCircOp(ID, /*IsLoad=*/true);
22959 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
22960 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
22961 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
22962 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
22963 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
22964 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
22965 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
22966 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
22967 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
22968 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
22969 return MakeCircOp(ID, /*IsLoad=*/false);
22970 case Hexagon::BI__builtin_brev_ldub:
22971 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
22972 case Hexagon::BI__builtin_brev_ldb:
22973 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
22974 case Hexagon::BI__builtin_brev_lduh:
22975 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
22976 case Hexagon::BI__builtin_brev_ldh:
22977 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
22978 case Hexagon::BI__builtin_brev_ldw:
22979 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
22980 case Hexagon::BI__builtin_brev_ldd:
22981 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
22982 } // switch
22983
22984 return nullptr;
22985}
22986
22988 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
22989 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
22990 return EmitRISCVCpuIs(CPUStr);
22991}
22992
22993Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) {
22994 llvm::Type *Int32Ty = Builder.getInt32Ty();
22995 llvm::Type *Int64Ty = Builder.getInt64Ty();
22996 llvm::StructType *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty);
22997 llvm::Constant *RISCVCPUModel =
22998 CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model");
22999 cast<llvm::GlobalValue>(RISCVCPUModel)->setDSOLocal(true);
23000
23001 auto loadRISCVCPUID = [&](unsigned Index) {
23002 Value *Ptr = Builder.CreateStructGEP(StructTy, RISCVCPUModel, Index);
23003 Value *CPUID = Builder.CreateAlignedLoad(StructTy->getTypeAtIndex(Index),
23004 Ptr, llvm::MaybeAlign());
23005 return CPUID;
23006 };
23007
23008 const llvm::RISCV::CPUModel Model = llvm::RISCV::getCPUModel(CPUStr);
23009
23010 // Compare mvendorid.
23011 Value *VendorID = loadRISCVCPUID(0);
23012 Value *Result =
23013 Builder.CreateICmpEQ(VendorID, Builder.getInt32(Model.MVendorID));
23014
23015 // Compare marchid.
23016 Value *ArchID = loadRISCVCPUID(1);
23017 Result = Builder.CreateAnd(
23018 Result, Builder.CreateICmpEQ(ArchID, Builder.getInt64(Model.MArchID)));
23019
23020 // Compare mimpid.
23021 Value *ImpID = loadRISCVCPUID(2);
23022 Result = Builder.CreateAnd(
23023 Result, Builder.CreateICmpEQ(ImpID, Builder.getInt64(Model.MImpID)));
23024
23025 return Result;
23026}
23027
23029 const CallExpr *E,
23030 ReturnValueSlot ReturnValue) {
23031
23032 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
23033 return EmitRISCVCpuSupports(E);
23034 if (BuiltinID == Builtin::BI__builtin_cpu_init)
23035 return EmitRISCVCpuInit();
23036 if (BuiltinID == Builtin::BI__builtin_cpu_is)
23037 return EmitRISCVCpuIs(E);
23038
23040 llvm::Type *ResultType = ConvertType(E->getType());
23041
23042 // Find out if any arguments are required to be integer constant expressions.
23043 unsigned ICEArguments = 0;
23045 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
23046 if (Error == ASTContext::GE_Missing_type) {
23047 // Vector intrinsics don't have a type string.
23048 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
23049 BuiltinID <= clang::RISCV::LastRVVBuiltin);
23050 ICEArguments = 0;
23051 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
23052 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
23053 ICEArguments = 1 << 1;
23054 } else {
23055 assert(Error == ASTContext::GE_None && "Unexpected error");
23056 }
23057
23058 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
23059 ICEArguments |= (1 << 1);
23060 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
23061 ICEArguments |= (1 << 2);
23062
23063 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
23064 // Handle aggregate argument, namely RVV tuple types in segment load/store
23065 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
23066 LValue L = EmitAggExprToLValue(E->getArg(i));
23067 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());
23068 Ops.push_back(AggValue);
23069 continue;
23070 }
23071 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
23072 }
23073
23074 Intrinsic::ID ID = Intrinsic::not_intrinsic;
23075 // The 0th bit simulates the `vta` of RVV
23076 // The 1st bit simulates the `vma` of RVV
23077 constexpr unsigned RVV_VTA = 0x1;
23078 constexpr unsigned RVV_VMA = 0x2;
23079 int PolicyAttrs = 0;
23080 bool IsMasked = false;
23081 // This is used by segment load/store to determine it's llvm type.
23082 unsigned SegInstSEW = 8;
23083
23084 // Required for overloaded intrinsics.
23086 switch (BuiltinID) {
23087 default: llvm_unreachable("unexpected builtin ID");
23088 case RISCV::BI__builtin_riscv_orc_b_32:
23089 case RISCV::BI__builtin_riscv_orc_b_64:
23090 case RISCV::BI__builtin_riscv_clmul_32:
23091 case RISCV::BI__builtin_riscv_clmul_64:
23092 case RISCV::BI__builtin_riscv_clmulh_32:
23093 case RISCV::BI__builtin_riscv_clmulh_64:
23094 case RISCV::BI__builtin_riscv_clmulr_32:
23095 case RISCV::BI__builtin_riscv_clmulr_64:
23096 case RISCV::BI__builtin_riscv_xperm4_32:
23097 case RISCV::BI__builtin_riscv_xperm4_64:
23098 case RISCV::BI__builtin_riscv_xperm8_32:
23099 case RISCV::BI__builtin_riscv_xperm8_64:
23100 case RISCV::BI__builtin_riscv_brev8_32:
23101 case RISCV::BI__builtin_riscv_brev8_64:
23102 case RISCV::BI__builtin_riscv_zip_32:
23103 case RISCV::BI__builtin_riscv_unzip_32: {
23104 switch (BuiltinID) {
23105 default: llvm_unreachable("unexpected builtin ID");
23106 // Zbb
23107 case RISCV::BI__builtin_riscv_orc_b_32:
23108 case RISCV::BI__builtin_riscv_orc_b_64:
23109 ID = Intrinsic::riscv_orc_b;
23110 break;
23111
23112 // Zbc
23113 case RISCV::BI__builtin_riscv_clmul_32:
23114 case RISCV::BI__builtin_riscv_clmul_64:
23115 ID = Intrinsic::riscv_clmul;
23116 break;
23117 case RISCV::BI__builtin_riscv_clmulh_32:
23118 case RISCV::BI__builtin_riscv_clmulh_64:
23119 ID = Intrinsic::riscv_clmulh;
23120 break;
23121 case RISCV::BI__builtin_riscv_clmulr_32:
23122 case RISCV::BI__builtin_riscv_clmulr_64:
23123 ID = Intrinsic::riscv_clmulr;
23124 break;
23125
23126 // Zbkx
23127 case RISCV::BI__builtin_riscv_xperm8_32:
23128 case RISCV::BI__builtin_riscv_xperm8_64:
23129 ID = Intrinsic::riscv_xperm8;
23130 break;
23131 case RISCV::BI__builtin_riscv_xperm4_32:
23132 case RISCV::BI__builtin_riscv_xperm4_64:
23133 ID = Intrinsic::riscv_xperm4;
23134 break;
23135
23136 // Zbkb
23137 case RISCV::BI__builtin_riscv_brev8_32:
23138 case RISCV::BI__builtin_riscv_brev8_64:
23139 ID = Intrinsic::riscv_brev8;
23140 break;
23141 case RISCV::BI__builtin_riscv_zip_32:
23142 ID = Intrinsic::riscv_zip;
23143 break;
23144 case RISCV::BI__builtin_riscv_unzip_32:
23145 ID = Intrinsic::riscv_unzip;
23146 break;
23147 }
23148
23149 IntrinsicTypes = {ResultType};
23150 break;
23151 }
23152
23153 // Zk builtins
23154
23155 // Zknh
23156 case RISCV::BI__builtin_riscv_sha256sig0:
23157 ID = Intrinsic::riscv_sha256sig0;
23158 break;
23159 case RISCV::BI__builtin_riscv_sha256sig1:
23160 ID = Intrinsic::riscv_sha256sig1;
23161 break;
23162 case RISCV::BI__builtin_riscv_sha256sum0:
23163 ID = Intrinsic::riscv_sha256sum0;
23164 break;
23165 case RISCV::BI__builtin_riscv_sha256sum1:
23166 ID = Intrinsic::riscv_sha256sum1;
23167 break;
23168
23169 // Zksed
23170 case RISCV::BI__builtin_riscv_sm4ks:
23171 ID = Intrinsic::riscv_sm4ks;
23172 break;
23173 case RISCV::BI__builtin_riscv_sm4ed:
23174 ID = Intrinsic::riscv_sm4ed;
23175 break;
23176
23177 // Zksh
23178 case RISCV::BI__builtin_riscv_sm3p0:
23179 ID = Intrinsic::riscv_sm3p0;
23180 break;
23181 case RISCV::BI__builtin_riscv_sm3p1:
23182 ID = Intrinsic::riscv_sm3p1;
23183 break;
23184
23185 case RISCV::BI__builtin_riscv_clz_32:
23186 case RISCV::BI__builtin_riscv_clz_64: {
23187 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
23188 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23189 if (Result->getType() != ResultType)
23190 Result =
23191 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23192 return Result;
23193 }
23194 case RISCV::BI__builtin_riscv_ctz_32:
23195 case RISCV::BI__builtin_riscv_ctz_64: {
23196 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
23197 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23198 if (Result->getType() != ResultType)
23199 Result =
23200 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23201 return Result;
23202 }
23203
23204 // Zihintntl
23205 case RISCV::BI__builtin_riscv_ntl_load: {
23206 llvm::Type *ResTy = ConvertType(E->getType());
23207 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23208 if (Ops.size() == 2)
23209 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
23210
23211 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23213 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23214 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23215 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23216
23217 int Width;
23218 if(ResTy->isScalableTy()) {
23219 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
23220 llvm::Type *ScalarTy = ResTy->getScalarType();
23221 Width = ScalarTy->getPrimitiveSizeInBits() *
23222 SVTy->getElementCount().getKnownMinValue();
23223 } else
23224 Width = ResTy->getPrimitiveSizeInBits();
23225 LoadInst *Load = Builder.CreateLoad(
23226 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
23227
23228 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23229 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23230 RISCVDomainNode);
23231
23232 return Load;
23233 }
23234 case RISCV::BI__builtin_riscv_ntl_store: {
23235 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23236 if (Ops.size() == 3)
23237 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
23238
23239 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23241 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23242 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23243 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23244
23245 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
23246 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23247 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23248 RISCVDomainNode);
23249
23250 return Store;
23251 }
23252 // XCValu
23253 case RISCV::BI__builtin_riscv_cv_alu_addN:
23254 ID = Intrinsic::riscv_cv_alu_addN;
23255 break;
23256 case RISCV::BI__builtin_riscv_cv_alu_addRN:
23257 ID = Intrinsic::riscv_cv_alu_addRN;
23258 break;
23259 case RISCV::BI__builtin_riscv_cv_alu_adduN:
23260 ID = Intrinsic::riscv_cv_alu_adduN;
23261 break;
23262 case RISCV::BI__builtin_riscv_cv_alu_adduRN:
23263 ID = Intrinsic::riscv_cv_alu_adduRN;
23264 break;
23265 case RISCV::BI__builtin_riscv_cv_alu_clip:
23266 ID = Intrinsic::riscv_cv_alu_clip;
23267 break;
23268 case RISCV::BI__builtin_riscv_cv_alu_clipu:
23269 ID = Intrinsic::riscv_cv_alu_clipu;
23270 break;
23271 case RISCV::BI__builtin_riscv_cv_alu_extbs:
23272 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23273 "extbs");
23274 case RISCV::BI__builtin_riscv_cv_alu_extbz:
23275 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23276 "extbz");
23277 case RISCV::BI__builtin_riscv_cv_alu_exths:
23278 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23279 "exths");
23280 case RISCV::BI__builtin_riscv_cv_alu_exthz:
23281 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23282 "exthz");
23283 case RISCV::BI__builtin_riscv_cv_alu_slet:
23284 return Builder.CreateZExt(Builder.CreateICmpSLE(Ops[0], Ops[1]), Int32Ty,
23285 "sle");
23286 case RISCV::BI__builtin_riscv_cv_alu_sletu:
23287 return Builder.CreateZExt(Builder.CreateICmpULE(Ops[0], Ops[1]), Int32Ty,
23288 "sleu");
23289 case RISCV::BI__builtin_riscv_cv_alu_subN:
23290 ID = Intrinsic::riscv_cv_alu_subN;
23291 break;
23292 case RISCV::BI__builtin_riscv_cv_alu_subRN:
23293 ID = Intrinsic::riscv_cv_alu_subRN;
23294 break;
23295 case RISCV::BI__builtin_riscv_cv_alu_subuN:
23296 ID = Intrinsic::riscv_cv_alu_subuN;
23297 break;
23298 case RISCV::BI__builtin_riscv_cv_alu_subuRN:
23299 ID = Intrinsic::riscv_cv_alu_subuRN;
23300 break;
23301
23302 // Vector builtins are handled from here.
23303#include "clang/Basic/riscv_vector_builtin_cg.inc"
23304
23305 // SiFive Vector builtins are handled from here.
23306#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
23307 }
23308
23309 assert(ID != Intrinsic::not_intrinsic);
23310
23311 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
23312 return Builder.CreateCall(F, Ops, "");
23313}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3443
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
static constexpr SparcCPUInfo CPUInfo[]
Definition: Sparc.cpp:67
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8959
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9795
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:376
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1413
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6985
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:568
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2306
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2272
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6854
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2763
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9765
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:1028
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9758
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:8000
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9985
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8012
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7982
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:9027
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2641
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:621
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:966
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:671
static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6981
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8013
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1610
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:8017
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
@ UnsignedAlts
Definition: CGBuiltin.cpp:6948
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6953
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6957
@ Use64BitVectors
Definition: CGBuiltin.cpp:6950
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6945
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6955
@ InventFloatType
Definition: CGBuiltin.cpp:6947
@ AddRetType
Definition: CGBuiltin.cpp:6940
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6942
@ VectorizeRetType
Definition: CGBuiltin.cpp:6944
@ VectorRet
Definition: CGBuiltin.cpp:6954
@ Add1ArgType
Definition: CGBuiltin.cpp:6941
@ Use128BitVectors
Definition: CGBuiltin.cpp:6951
static Value * loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder, CodeGenModule &CGM)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:1088
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:790
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2669
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:645
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:996
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9754
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8014
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9832
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6737
constexpr unsigned SVEBitsPerBlock
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2006
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7824
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6978
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:142
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:688
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:428
static bool HasNoIndirectArgumentsOrResults(CGFunctionInfo const &FnInfo)
Checks no arguments or results are passed indirectly in the ABI (i.e.
Definition: CGBuiltin.cpp:902
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition: CGBuiltin.cpp:249
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9859
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1847
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1696
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1474
static Value * emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:765
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6990
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:868
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:859
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2499
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:1036
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1549
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9821
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:725
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:984
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2553
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2692
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6813
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:419
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9847
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:347
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8938
static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:826
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:78
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8930
@ VolatileRead
Definition: CGBuiltin.cpp:8932
@ NormalRead
Definition: CGBuiltin.cpp:8931
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:513
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:358
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2541
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:473
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:705
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:336
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9787
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8009
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:7309
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:400
static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT)
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:951
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:8075
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:778
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:808
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:657
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2774
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1423
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2507
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:744
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:913
static Value * handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:212
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:411
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1459
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8857
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2266
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:632
static Value * handleHlslClip(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:101
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8011
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:7584
CodeGenFunction::ComplexPairTy ComplexPairTy
const Decl * D
Expr * E
const Environment & Env
Definition: HTMLLogger.cpp:147
unsigned Iter
Definition: HTMLLogger.cpp:153
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:144
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition: SemaHLSL.cpp:43
SourceRange Range
Definition: SemaObjC.cpp:758
SourceLocation Loc
Definition: SemaObjC.cpp:759
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
C Language Family Type Representation.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:465
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1187
IdentifierTable & Idents
Definition: ASTContext.h:680
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:682
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType IntTy
Definition: ASTContext.h:1169
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2196
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2763
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2482
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1160
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:799
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2384
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2387
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3577
QualType getElementType() const
Definition: Type.h:3589
static std::unique_ptr< AtomicScopeModel > create(AtomicScopeModelKind K)
Create an atomic scope model by AtomicScopeModelKind.
Definition: SyncScope.h:273
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:150
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:248
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:252
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:123
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2874
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
ABIArgInfo - Helper class to encapsulate information about how a specific C type should be passed to ...
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
An aggregate value slot.
Definition: CGValue.h:504
Address getAddress() const
Definition: CGValue.h:644
A scoped helper to set the current debug location to the specified location or preferred location of ...
Definition: CGDebugInfo.h:858
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:898
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:915
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:143
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:389
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:381
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:180
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:405
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:151
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:398
Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:219
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:169
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:315
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:437
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:365
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:128
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:189
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:261
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:346
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:259
All available information about a concrete callee.
Definition: CGCall.h:63
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:137
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
llvm::DILocation * CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg)
Create a debug location from TrapLocation that adds an artificial inline frame where the frame name i...
CGFunctionInfo - Class to encapsulate the information about a function definition.
MutableArrayRef< ArgInfo > arguments()
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:274
void add(RValue rvalue, QualType type)
Definition: CGCall.h:305
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID, bool NoMerge=false)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, QualType Ty)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitRISCVCpuSupports(const CallExpr *E)
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
llvm::Value * EmitLoadOfCountedByField(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Address makeNaturalAddressForPointer(llvm::Value *Ptr, QualType T, CharUnits Alignment=CharUnits::Zero(), bool ForPointeeType=false, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
Construct an address with the natural alignment of T.
llvm::Value * EmitCheckedArgForAssume(const Expr *E)
Emits an argument for a call to a __builtin_assume.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
RValue EmitAnyExpr(const Expr *E, AggValueSlot aggSlot=AggValueSlot::ignored(), bool ignoreResult=false)
EmitAnyExpr - Emit code to compute the specified expression which can have any type.
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
const FieldDecl * FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset)
llvm::Value * EmitRISCVCpuIs(const CallExpr *E)
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **CallOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * GetCountedByFieldExprGEP(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
llvm::Type * ConvertType(QualType T)
void EmitWritebacks(const CallArgList &Args)
EmitWriteback - Emit callbacks for function.
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV, QualType Type, SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitSPIRVBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getRawFunctionPointer(GlobalDecl GD, llvm::Type *Ty=nullptr)
Return a function pointer for a reference to the given function.
Definition: CGExpr.cpp:2909
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:262
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
const LangOptions & getLangOpts() const
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1630
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:679
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:667
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:382
LValue - This represents an lvalue references.
Definition: CGValue.h:182
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:419
llvm::Value * getPointer(CodeGenFunction &CGF) const
Address getAddress() const
Definition: CGValue.h:361
void setNontemporal(bool Value)
Definition: CGValue.h:319
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
llvm::Value * getAggregatePointer(QualType PointeeType, CodeGenFunction &CGF) const
Definition: CGValue.h:88
bool isScalar() const
Definition: CGValue.h:64
static RValue getIgnored()
Definition: CGValue.h:93
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:125
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:386
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:170
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:160
const T & getABIInfo() const
Definition: TargetInfo.h:57
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:142
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:179
Complex values, per C99 6.2.5p11.
Definition: Type.h:3145
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:4232
Represents a sugar type with __counted_by or __sized_by annotations, including their _or_null variant...
Definition: Type.h:3306
DynamicCountPointerKind getKind() const
Definition: Type.h:3336
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:2016
T * getAttr() const
Definition: DeclBase.h:576
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:596
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:246
DeclContext * getDeclContext()
Definition: DeclBase.h:451
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:432
bool hasAttr() const
Definition: DeclBase.h:580
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:231
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1493
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3095
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3090
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3086
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:830
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3587
std::optional< std::string > tryEvaluateString(ASTContext &Ctx) const
If the current Expr can be evaluated to a pointer to a null-terminated constant string,...
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3070
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3963
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:226
Represents difference between two FPOptions values.
Definition: LangOptions.h:978
Represents a member of a struct/union/class.
Definition: Decl.h:3033
const FieldDecl * findCountedByField() const
Find the FieldDecl specified in a FAM's "counted_by" attribute.
Definition: Decl.cpp:4701
Represents a function declaration or definition.
Definition: Decl.h:1935
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2672
unsigned getBuiltinID(bool ConsiderWrapperFunctions=false) const
Returns a value indicating whether this function corresponds to a builtin function.
Definition: Decl.cpp:3638
Represents a prototype with parameter type info, e.g.
Definition: Type.h:5102
@ SME_PStateSMEnabledMask
Definition: Type.h:4587
@ SME_PStateSMCompatibleMask
Definition: Type.h:4588
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5398
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3236
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:280
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:296
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:7780
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3198
QualType getPointeeType() const
Definition: Type.h:3208
A (possibly-)qualified type.
Definition: Type.h:929
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:8015
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2893
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:8057
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2889
The collection of all-type qualifiers we support.
Definition: Type.h:324
Represents a struct/union/class.
Definition: Decl.h:4160
field_range fields() const
Definition: Decl.h:4366
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool setsFPMR() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:345
bool isUnion() const
Definition: Decl.h:3782
bool areArgsDestroyedLeftToRightInCallee() const
Are arguments to a call destroyed left to right in the callee? This is a fundamental language change,...
Definition: TargetCXXABI.h:188
Exposes information about the current target.
Definition: TargetInfo.h:220
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:311
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:697
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1262
bool isLittleEndian() const
Definition: TargetInfo.h:1673
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:861
bool isBigEndian() const
Definition: TargetInfo.h:1672
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
Definition: TargetInfo.h:1333
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1679
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:733
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1828
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1916
bool isBlockPointerType() const
Definition: Type.h:8200
bool isVoidType() const
Definition: Type.h:8510
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2180
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:710
bool hasIntegerRepresentation() const
Determine whether this type has an integer representation of some sort, e.g., it is an integer type o...
Definition: Type.cpp:2055
bool isArrayType() const
Definition: Type.h:8258
bool isCountAttributedType() const
Definition: Type.cpp:727
bool isPointerType() const
Definition: Type.h:8186
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8550
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8800
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:738
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8625
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2270
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2220
bool isObjCObjectPointerType() const
Definition: Type.h:8328
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2292
bool isVectorType() const
Definition: Type.h:8298
bool isFloatingType() const
Definition: Type.cpp:2283
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2230
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8731
bool isRecordType() const
Definition: Type.h:8286
bool isSizelessVectorType() const
Returns true for all scalable vector types.
Definition: Type.cpp:2513
QualType getSizelessVectorEltType(const ASTContext &Ctx) const
Returns the representative type for the element of a sizeless vector builtin type.
Definition: Type.cpp:2581
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1920
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:671
QualType getType() const
Definition: Decl.h:682
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:4034
unsigned getNumElements() const
Definition: Type.h:4049
QualType getElementType() const
Definition: Type.h:4048
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:180
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: FixedPoint.h:19
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1212
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2355
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:447
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:726
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1695
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2128
bool Ret(InterpState &S, CodePtr &PC)
Definition: Interp.h:318
The JSON file list parser is used to communicate input to InstallAPI.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:154
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
const FunctionProtoType * T
SyncScope
Defines synch scope values used internally by clang.
Definition: SyncScope.h:42
llvm::StringRef getAsString(SyncScope S)
Definition: SyncScope.h:60
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
int int32_t
unsigned long uint64_t
long int64_t
unsigned int uint32_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:25
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:176
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:168
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:159
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742