clang 20.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CodeGenFunction.h"
21#include "CodeGenModule.h"
22#include "ConstantEmitter.h"
23#include "PatternInit.h"
24#include "TargetInfo.h"
26#include "clang/AST/Attr.h"
27#include "clang/AST/Decl.h"
28#include "clang/AST/OSLog.h"
35#include "llvm/ADT/APFloat.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/FloatingPointMode.h"
38#include "llvm/ADT/SmallPtrSet.h"
39#include "llvm/ADT/StringExtras.h"
40#include "llvm/Analysis/ValueTracking.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/IR/Intrinsics.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/IntrinsicsAMDGPU.h"
46#include "llvm/IR/IntrinsicsARM.h"
47#include "llvm/IR/IntrinsicsBPF.h"
48#include "llvm/IR/IntrinsicsDirectX.h"
49#include "llvm/IR/IntrinsicsHexagon.h"
50#include "llvm/IR/IntrinsicsNVPTX.h"
51#include "llvm/IR/IntrinsicsPowerPC.h"
52#include "llvm/IR/IntrinsicsR600.h"
53#include "llvm/IR/IntrinsicsRISCV.h"
54#include "llvm/IR/IntrinsicsS390.h"
55#include "llvm/IR/IntrinsicsVE.h"
56#include "llvm/IR/IntrinsicsWebAssembly.h"
57#include "llvm/IR/IntrinsicsX86.h"
58#include "llvm/IR/MDBuilder.h"
59#include "llvm/IR/MatrixBuilder.h"
60#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
61#include "llvm/Support/ConvertUTF.h"
62#include "llvm/Support/MathExtras.h"
63#include "llvm/Support/ScopedPrinter.h"
64#include "llvm/TargetParser/AArch64TargetParser.h"
65#include "llvm/TargetParser/RISCVISAInfo.h"
66#include "llvm/TargetParser/X86TargetParser.h"
67#include <optional>
68#include <sstream>
69
70using namespace clang;
71using namespace CodeGen;
72using namespace llvm;
73
74static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
75 Align AlignmentInBytes) {
76 ConstantInt *Byte;
77 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
78 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
79 // Nothing to initialize.
80 return;
81 case LangOptions::TrivialAutoVarInitKind::Zero:
82 Byte = CGF.Builder.getInt8(0x00);
83 break;
84 case LangOptions::TrivialAutoVarInitKind::Pattern: {
85 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
86 Byte = llvm::dyn_cast<llvm::ConstantInt>(
87 initializationPatternFor(CGF.CGM, Int8));
88 break;
89 }
90 }
91 if (CGF.CGM.stopAutoInit())
92 return;
93 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
94 I->addAnnotationMetadata("auto-init");
95}
96
97/// getBuiltinLibFunction - Given a builtin id for a function like
98/// "__builtin_fabsf", return a Function* for "fabsf".
100 unsigned BuiltinID) {
101 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
102
103 // Get the name, skip over the __builtin_ prefix (if necessary).
104 StringRef Name;
105 GlobalDecl D(FD);
106
107 // TODO: This list should be expanded or refactored after all GCC-compatible
108 // std libcall builtins are implemented.
109 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
110 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
111 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
112 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
113 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
114 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
115 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
116 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
117 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
118 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
119 {Builtin::BI__builtin_printf, "__printfieee128"},
120 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
121 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
122 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
123 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
124 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
125 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
126 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
127 {Builtin::BI__builtin_scanf, "__scanfieee128"},
128 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
129 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
130 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
131 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
132 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
133 };
134
135 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
136 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
137 // if it is 64-bit 'long double' mode.
138 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
139 {Builtin::BI__builtin_frexpl, "frexp"},
140 {Builtin::BI__builtin_ldexpl, "ldexp"},
141 {Builtin::BI__builtin_modfl, "modf"},
142 };
143
144 // If the builtin has been declared explicitly with an assembler label,
145 // use the mangled name. This differs from the plain label on platforms
146 // that prefix labels.
147 if (FD->hasAttr<AsmLabelAttr>())
148 Name = getMangledName(D);
149 else {
150 // TODO: This mutation should also be applied to other targets other than
151 // PPC, after backend supports IEEE 128-bit style libcalls.
152 if (getTriple().isPPC64() &&
153 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
154 F128Builtins.contains(BuiltinID))
155 Name = F128Builtins[BuiltinID];
156 else if (getTriple().isOSAIX() &&
157 &getTarget().getLongDoubleFormat() ==
158 &llvm::APFloat::IEEEdouble() &&
159 AIXLongDouble64Builtins.contains(BuiltinID))
160 Name = AIXLongDouble64Builtins[BuiltinID];
161 else
162 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
163 }
164
165 llvm::FunctionType *Ty =
166 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
167
168 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
169}
170
171/// Emit the conversions required to turn the given value into an
172/// integer of the given size.
173static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
174 QualType T, llvm::IntegerType *IntType) {
175 V = CGF.EmitToMemory(V, T);
176
177 if (V->getType()->isPointerTy())
178 return CGF.Builder.CreatePtrToInt(V, IntType);
179
180 assert(V->getType() == IntType);
181 return V;
182}
183
184static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
185 QualType T, llvm::Type *ResultType) {
186 V = CGF.EmitFromMemory(V, T);
187
188 if (ResultType->isPointerTy())
189 return CGF.Builder.CreateIntToPtr(V, ResultType);
190
191 assert(V->getType() == ResultType);
192 return V;
193}
194
196 ASTContext &Ctx = CGF.getContext();
197 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
198 unsigned Bytes = Ptr.getElementType()->isPointerTy()
200 : Ptr.getElementType()->getScalarSizeInBits() / 8;
201 unsigned Align = Ptr.getAlignment().getQuantity();
202 if (Align % Bytes != 0) {
203 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
204 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
205 // Force address to be at least naturally-aligned.
206 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
207 }
208 return Ptr;
209}
210
211/// Utility to insert an atomic instruction based on Intrinsic::ID
212/// and the expression node.
214 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
215 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
216
217 QualType T = E->getType();
218 assert(E->getArg(0)->getType()->isPointerType());
220 E->getArg(0)->getType()->getPointeeType()));
221 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
222
223 Address DestAddr = CheckAtomicAlignment(CGF, E);
224
225 llvm::IntegerType *IntType = llvm::IntegerType::get(
226 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
227
228 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
229 llvm::Type *ValueType = Val->getType();
230 Val = EmitToInt(CGF, Val, T, IntType);
231
232 llvm::Value *Result =
233 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
234 return EmitFromInt(CGF, Result, T, ValueType);
235}
236
238 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
239 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
240
241 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
242 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
243 LV.setNontemporal(true);
244 CGF.EmitStoreOfScalar(Val, LV, false);
245 return nullptr;
246}
247
249 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
250
251 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
252 LV.setNontemporal(true);
253 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
254}
255
257 llvm::AtomicRMWInst::BinOp Kind,
258 const CallExpr *E) {
259 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
260}
261
262/// Utility to insert an atomic instruction based Intrinsic::ID and
263/// the expression node, where the return value is the result of the
264/// operation.
266 llvm::AtomicRMWInst::BinOp Kind,
267 const CallExpr *E,
268 Instruction::BinaryOps Op,
269 bool Invert = false) {
270 QualType T = E->getType();
271 assert(E->getArg(0)->getType()->isPointerType());
273 E->getArg(0)->getType()->getPointeeType()));
274 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
275
276 Address DestAddr = CheckAtomicAlignment(CGF, E);
277
278 llvm::IntegerType *IntType = llvm::IntegerType::get(
279 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
280
281 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
282 llvm::Type *ValueType = Val->getType();
283 Val = EmitToInt(CGF, Val, T, IntType);
284
285 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
286 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
287 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
288 if (Invert)
289 Result =
290 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
291 llvm::ConstantInt::getAllOnesValue(IntType));
292 Result = EmitFromInt(CGF, Result, T, ValueType);
293 return RValue::get(Result);
294}
295
296/// Utility to insert an atomic cmpxchg instruction.
297///
298/// @param CGF The current codegen function.
299/// @param E Builtin call expression to convert to cmpxchg.
300/// arg0 - address to operate on
301/// arg1 - value to compare with
302/// arg2 - new value
303/// @param ReturnBool Specifies whether to return success flag of
304/// cmpxchg result or the old value.
305///
306/// @returns result of cmpxchg, according to ReturnBool
307///
308/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
309/// invoke the function EmitAtomicCmpXchgForMSIntrin.
311 bool ReturnBool) {
312 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
313 Address DestAddr = CheckAtomicAlignment(CGF, E);
314
315 llvm::IntegerType *IntType = llvm::IntegerType::get(
316 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
317
318 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
319 llvm::Type *ValueType = Cmp->getType();
320 Cmp = EmitToInt(CGF, Cmp, T, IntType);
321 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
322
324 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
325 llvm::AtomicOrdering::SequentiallyConsistent);
326 if (ReturnBool)
327 // Extract boolean success flag and zext it to int.
328 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
329 CGF.ConvertType(E->getType()));
330 else
331 // Extract old value and emit it using the same type as compare value.
332 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
333 ValueType);
334}
335
336/// This function should be invoked to emit atomic cmpxchg for Microsoft's
337/// _InterlockedCompareExchange* intrinsics which have the following signature:
338/// T _InterlockedCompareExchange(T volatile *Destination,
339/// T Exchange,
340/// T Comparand);
341///
342/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
343/// cmpxchg *Destination, Comparand, Exchange.
344/// So we need to swap Comparand and Exchange when invoking
345/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
346/// function MakeAtomicCmpXchgValue since it expects the arguments to be
347/// already swapped.
348
349static
351 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
352 assert(E->getArg(0)->getType()->isPointerType());
354 E->getType(), E->getArg(0)->getType()->getPointeeType()));
356 E->getArg(1)->getType()));
358 E->getArg(2)->getType()));
359
360 Address DestAddr = CheckAtomicAlignment(CGF, E);
361
362 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
363 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
364
365 // For Release ordering, the failure ordering should be Monotonic.
366 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
367 AtomicOrdering::Monotonic :
368 SuccessOrdering;
369
370 // The atomic instruction is marked volatile for consistency with MSVC. This
371 // blocks the few atomics optimizations that LLVM has. If we want to optimize
372 // _Interlocked* operations in the future, we will have to remove the volatile
373 // marker.
375 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
376 Result->setVolatile(true);
377 return CGF.Builder.CreateExtractValue(Result, 0);
378}
379
380// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
381// prototyped like this:
382//
383// unsigned char _InterlockedCompareExchange128...(
384// __int64 volatile * _Destination,
385// __int64 _ExchangeHigh,
386// __int64 _ExchangeLow,
387// __int64 * _ComparandResult);
388//
389// Note that Destination is assumed to be at least 16-byte aligned, despite
390// being typed int64.
391
393 const CallExpr *E,
394 AtomicOrdering SuccessOrdering) {
395 assert(E->getNumArgs() == 4);
396 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
397 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
398 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
399 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
400
401 assert(DestPtr->getType()->isPointerTy());
402 assert(!ExchangeHigh->getType()->isPointerTy());
403 assert(!ExchangeLow->getType()->isPointerTy());
404
405 // For Release ordering, the failure ordering should be Monotonic.
406 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
407 ? AtomicOrdering::Monotonic
408 : SuccessOrdering;
409
410 // Convert to i128 pointers and values. Alignment is also overridden for
411 // destination pointer.
412 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
413 Address DestAddr(DestPtr, Int128Ty,
415 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
416
417 // (((i128)hi) << 64) | ((i128)lo)
418 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
419 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
420 ExchangeHigh =
421 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
422 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
423
424 // Load the comparand for the instruction.
425 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
426
427 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
428 SuccessOrdering, FailureOrdering);
429
430 // The atomic instruction is marked volatile for consistency with MSVC. This
431 // blocks the few atomics optimizations that LLVM has. If we want to optimize
432 // _Interlocked* operations in the future, we will have to remove the volatile
433 // marker.
434 CXI->setVolatile(true);
435
436 // Store the result as an outparameter.
437 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
438 ComparandAddr);
439
440 // Get the success boolean and zero extend it to i8.
441 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
442 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
443}
444
446 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
447 assert(E->getArg(0)->getType()->isPointerType());
448
449 auto *IntTy = CGF.ConvertType(E->getType());
450 Address DestAddr = CheckAtomicAlignment(CGF, E);
451 auto *Result = CGF.Builder.CreateAtomicRMW(
452 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
453 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
454}
455
457 CodeGenFunction &CGF, const CallExpr *E,
458 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
459 assert(E->getArg(0)->getType()->isPointerType());
460
461 auto *IntTy = CGF.ConvertType(E->getType());
462 Address DestAddr = CheckAtomicAlignment(CGF, E);
463 auto *Result = CGF.Builder.CreateAtomicRMW(
464 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
465 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
466}
467
468// Build a plain volatile load.
470 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
471 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
472 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
473 llvm::Type *ITy =
474 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
475 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
476 Load->setVolatile(true);
477 return Load;
478}
479
480// Build a plain volatile store.
482 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
483 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
484 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
485 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
486 llvm::StoreInst *Store =
487 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
488 Store->setVolatile(true);
489 return Store;
490}
491
492// Emit a simple mangled intrinsic that has 1 argument and a return type
493// matching the argument type. Depending on mode, this may be a constrained
494// floating-point intrinsic.
496 const CallExpr *E, unsigned IntrinsicID,
497 unsigned ConstrainedIntrinsicID) {
498 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
499
500 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
501 if (CGF.Builder.getIsFPConstrained()) {
502 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
503 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
504 } else {
505 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
506 return CGF.Builder.CreateCall(F, Src0);
507 }
508}
509
510// Emit an intrinsic that has 2 operands of the same type as its result.
511// Depending on mode, this may be a constrained floating-point intrinsic.
513 const CallExpr *E, unsigned IntrinsicID,
514 unsigned ConstrainedIntrinsicID) {
515 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
516 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
517
518 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
519 if (CGF.Builder.getIsFPConstrained()) {
520 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
521 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
522 } else {
523 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
524 return CGF.Builder.CreateCall(F, { Src0, Src1 });
525 }
526}
527
528// Has second type mangled argument.
530 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
531 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
532 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
533 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
534
535 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
536 if (CGF.Builder.getIsFPConstrained()) {
537 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
538 {Src0->getType(), Src1->getType()});
539 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
540 }
541
542 Function *F =
543 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
544 return CGF.Builder.CreateCall(F, {Src0, Src1});
545}
546
547// Emit an intrinsic that has 3 operands of the same type as its result.
548// Depending on mode, this may be a constrained floating-point intrinsic.
550 const CallExpr *E, unsigned IntrinsicID,
551 unsigned ConstrainedIntrinsicID) {
552 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
553 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
554 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
555
556 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
557 if (CGF.Builder.getIsFPConstrained()) {
558 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
559 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
560 } else {
561 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
562 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
563 }
564}
565
566// Emit an intrinsic where all operands are of the same type as the result.
567// Depending on mode, this may be a constrained floating-point intrinsic.
569 unsigned IntrinsicID,
570 unsigned ConstrainedIntrinsicID,
571 llvm::Type *Ty,
572 ArrayRef<Value *> Args) {
573 Function *F;
574 if (CGF.Builder.getIsFPConstrained())
575 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
576 else
577 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
578
579 if (CGF.Builder.getIsFPConstrained())
580 return CGF.Builder.CreateConstrainedFPCall(F, Args);
581 else
582 return CGF.Builder.CreateCall(F, Args);
583}
584
585// Emit a simple intrinsic that has N scalar arguments and a return type
586// matching the argument type. It is assumed that only the first argument is
587// overloaded.
588template <unsigned N>
590 unsigned IntrinsicID,
591 llvm::StringRef Name = "") {
592 static_assert(N, "expect non-empty argument");
594 for (unsigned I = 0; I < N; ++I)
595 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
596 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
597 return CGF.Builder.CreateCall(F, Args, Name);
598}
599
600// Emit an intrinsic that has 1 float or double operand, and 1 integer.
602 const CallExpr *E,
603 unsigned IntrinsicID) {
604 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
605 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
606
607 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
608 return CGF.Builder.CreateCall(F, {Src0, Src1});
609}
610
611// Emit an intrinsic that has overloaded integer result and fp operand.
612static Value *
614 unsigned IntrinsicID,
615 unsigned ConstrainedIntrinsicID) {
616 llvm::Type *ResultType = CGF.ConvertType(E->getType());
617 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
618
619 if (CGF.Builder.getIsFPConstrained()) {
620 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
621 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
622 {ResultType, Src0->getType()});
623 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
624 } else {
625 Function *F =
626 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
627 return CGF.Builder.CreateCall(F, Src0);
628 }
629}
630
632 llvm::Intrinsic::ID IntrinsicID) {
633 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
634 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
635
636 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
637 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
638 llvm::Function *F =
639 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
640 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
641
642 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
643 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
644 CGF.EmitStoreOfScalar(Exp, LV);
645
646 return CGF.Builder.CreateExtractValue(Call, 0);
647}
648
649/// EmitFAbs - Emit a call to @llvm.fabs().
651 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
652 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
653 Call->setDoesNotAccessMemory();
654 return Call;
655}
656
657/// Emit the computation of the sign bit for a floating point value. Returns
658/// the i1 sign bit value.
660 LLVMContext &C = CGF.CGM.getLLVMContext();
661
662 llvm::Type *Ty = V->getType();
663 int Width = Ty->getPrimitiveSizeInBits();
664 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
665 V = CGF.Builder.CreateBitCast(V, IntTy);
666 if (Ty->isPPC_FP128Ty()) {
667 // We want the sign bit of the higher-order double. The bitcast we just
668 // did works as if the double-double was stored to memory and then
669 // read as an i128. The "store" will put the higher-order double in the
670 // lower address in both little- and big-Endian modes, but the "load"
671 // will treat those bits as a different part of the i128: the low bits in
672 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
673 // we need to shift the high bits down to the low before truncating.
674 Width >>= 1;
675 if (CGF.getTarget().isBigEndian()) {
676 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
677 V = CGF.Builder.CreateLShr(V, ShiftCst);
678 }
679 // We are truncating value in order to extract the higher-order
680 // double, which we will be using to extract the sign from.
681 IntTy = llvm::IntegerType::get(C, Width);
682 V = CGF.Builder.CreateTrunc(V, IntTy);
683 }
684 Value *Zero = llvm::Constant::getNullValue(IntTy);
685 return CGF.Builder.CreateICmpSLT(V, Zero);
686}
687
689 const CallExpr *E, llvm::Constant *calleeValue) {
690 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
691 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
692 RValue Call =
693 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
694
695 // Check the supported intrinsic.
696 if (unsigned BuiltinID = FD->getBuiltinID()) {
697 auto IsErrnoIntrinsic = [&]() -> unsigned {
698 switch (BuiltinID) {
699 case Builtin::BIexpf:
700 case Builtin::BI__builtin_expf:
701 case Builtin::BI__builtin_expf128:
702 return true;
703 }
704 // TODO: support more FP math libcalls
705 return false;
706 }();
707
708 // Restrict to target with errno, for example, MacOS doesn't set errno.
709 if (IsErrnoIntrinsic && CGF.CGM.getLangOpts().MathErrno &&
710 !CGF.Builder.getIsFPConstrained()) {
711 ASTContext &Context = CGF.getContext();
712 // Emit "int" TBAA metadata on FP math libcalls.
713 clang::QualType IntTy = Context.IntTy;
714 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
715 Instruction *Inst = cast<llvm::Instruction>(Call.getScalarVal());
716 CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo);
717 }
718 }
719 return Call;
720}
721
722/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
723/// depending on IntrinsicID.
724///
725/// \arg CGF The current codegen function.
726/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
727/// \arg X The first argument to the llvm.*.with.overflow.*.
728/// \arg Y The second argument to the llvm.*.with.overflow.*.
729/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
730/// \returns The result (i.e. sum/product) returned by the intrinsic.
731static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
732 const llvm::Intrinsic::ID IntrinsicID,
733 llvm::Value *X, llvm::Value *Y,
734 llvm::Value *&Carry) {
735 // Make sure we have integers of the same width.
736 assert(X->getType() == Y->getType() &&
737 "Arguments must be the same type. (Did you forget to make sure both "
738 "arguments have the same integer width?)");
739
740 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
741 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
742 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
743 return CGF.Builder.CreateExtractValue(Tmp, 0);
744}
745
746static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
747 int low, int high) {
748 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
749 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
750 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
751 Call->addRangeRetAttr(CR);
752 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
753 return Call;
754}
755
756namespace {
757 struct WidthAndSignedness {
758 unsigned Width;
759 bool Signed;
760 };
761}
762
763static WidthAndSignedness
765 const clang::QualType Type) {
766 assert(Type->isIntegerType() && "Given type is not an integer.");
767 unsigned Width = Type->isBooleanType() ? 1
768 : Type->isBitIntType() ? context.getIntWidth(Type)
769 : context.getTypeInfo(Type).Width;
771 return {Width, Signed};
772}
773
774// Given one or more integer types, this function produces an integer type that
775// encompasses them: any value in one of the given types could be expressed in
776// the encompassing type.
777static struct WidthAndSignedness
778EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
779 assert(Types.size() > 0 && "Empty list of types.");
780
781 // If any of the given types is signed, we must return a signed type.
782 bool Signed = false;
783 for (const auto &Type : Types) {
784 Signed |= Type.Signed;
785 }
786
787 // The encompassing type must have a width greater than or equal to the width
788 // of the specified types. Additionally, if the encompassing type is signed,
789 // its width must be strictly greater than the width of any unsigned types
790 // given.
791 unsigned Width = 0;
792 for (const auto &Type : Types) {
793 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
794 if (Width < MinWidth) {
795 Width = MinWidth;
796 }
797 }
798
799 return {Width, Signed};
800}
801
802Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
803 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
804 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
805 ArgValue);
806}
807
808/// Checks if using the result of __builtin_object_size(p, @p From) in place of
809/// __builtin_object_size(p, @p To) is correct
810static bool areBOSTypesCompatible(int From, int To) {
811 // Note: Our __builtin_object_size implementation currently treats Type=0 and
812 // Type=2 identically. Encoding this implementation detail here may make
813 // improving __builtin_object_size difficult in the future, so it's omitted.
814 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
815}
816
817static llvm::Value *
818getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
819 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
820}
821
822llvm::Value *
823CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
824 llvm::IntegerType *ResType,
825 llvm::Value *EmittedE,
826 bool IsDynamic) {
827 uint64_t ObjectSize;
828 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
829 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
830 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
831}
832
834 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
835 uint64_t &Offset) {
836 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
837 getLangOpts().getStrictFlexArraysLevel();
838 uint32_t FieldNo = 0;
839
840 if (RD->isImplicit())
841 return nullptr;
842
843 for (const FieldDecl *FD : RD->fields()) {
844 if ((!FAMDecl || FD == FAMDecl) &&
846 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
847 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
848 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
849 Offset += Layout.getFieldOffset(FieldNo);
850 return FD;
851 }
852
853 QualType Ty = FD->getType();
854 if (Ty->isRecordType()) {
856 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
857 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
858 Offset += Layout.getFieldOffset(FieldNo);
859 return Field;
860 }
861 }
862
863 if (!RD->isUnion())
864 ++FieldNo;
865 }
866
867 return nullptr;
868}
869
870static unsigned CountCountedByAttrs(const RecordDecl *RD) {
871 unsigned Num = 0;
872
873 for (const FieldDecl *FD : RD->fields()) {
874 if (FD->getType()->isCountAttributedType())
875 return ++Num;
876
877 QualType Ty = FD->getType();
878 if (Ty->isRecordType())
880 }
881
882 return Num;
883}
884
885llvm::Value *
886CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
887 llvm::IntegerType *ResType) {
888 // The code generated here calculates the size of a struct with a flexible
889 // array member that uses the counted_by attribute. There are two instances
890 // we handle:
891 //
892 // struct s {
893 // unsigned long flags;
894 // int count;
895 // int array[] __attribute__((counted_by(count)));
896 // }
897 //
898 // 1) bdos of the flexible array itself:
899 //
900 // __builtin_dynamic_object_size(p->array, 1) ==
901 // p->count * sizeof(*p->array)
902 //
903 // 2) bdos of a pointer into the flexible array:
904 //
905 // __builtin_dynamic_object_size(&p->array[42], 1) ==
906 // (p->count - 42) * sizeof(*p->array)
907 //
908 // 2) bdos of the whole struct, including the flexible array:
909 //
910 // __builtin_dynamic_object_size(p, 1) ==
911 // max(sizeof(struct s),
912 // offsetof(struct s, array) + p->count * sizeof(*p->array))
913 //
914 ASTContext &Ctx = getContext();
915 const Expr *Base = E->IgnoreParenImpCasts();
916 const Expr *Idx = nullptr;
917
918 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
919 UO && UO->getOpcode() == UO_AddrOf) {
920 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
921 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
922 Base = ASE->getBase()->IgnoreParenImpCasts();
923 Idx = ASE->getIdx()->IgnoreParenImpCasts();
924
925 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
926 int64_t Val = IL->getValue().getSExtValue();
927 if (Val < 0)
929
930 if (Val == 0)
931 // The index is 0, so we don't need to take it into account.
932 Idx = nullptr;
933 }
934 } else {
935 // Potential pointer to another element in the struct.
936 Base = SubExpr;
937 }
938 }
939
940 // Get the flexible array member Decl.
941 const RecordDecl *OuterRD = nullptr;
942 const FieldDecl *FAMDecl = nullptr;
943 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
944 // Check if \p Base is referencing the FAM itself.
945 const ValueDecl *VD = ME->getMemberDecl();
947 FAMDecl = dyn_cast<FieldDecl>(VD);
948 if (!FAMDecl)
949 return nullptr;
950 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
951 // Check if we're pointing to the whole struct.
952 QualType Ty = DRE->getDecl()->getType();
953 if (Ty->isPointerType())
954 Ty = Ty->getPointeeType();
955 OuterRD = Ty->getAsRecordDecl();
956
957 // If we have a situation like this:
958 //
959 // struct union_of_fams {
960 // int flags;
961 // union {
962 // signed char normal_field;
963 // struct {
964 // int count1;
965 // int arr1[] __counted_by(count1);
966 // };
967 // struct {
968 // signed char count2;
969 // int arr2[] __counted_by(count2);
970 // };
971 // };
972 // };
973 //
974 // We don't know which 'count' to use in this scenario:
975 //
976 // size_t get_size(struct union_of_fams *p) {
977 // return __builtin_dynamic_object_size(p, 1);
978 // }
979 //
980 // Instead of calculating a wrong number, we give up.
981 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
982 return nullptr;
983 }
984
985 if (!OuterRD)
986 return nullptr;
987
988 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
989 // get its offset.
990 uint64_t Offset = 0;
991 FAMDecl =
992 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
993 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
994
995 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
996 // No flexible array member found or it doesn't have the "counted_by"
997 // attribute.
998 return nullptr;
999
1000 const FieldDecl *CountedByFD = FindCountedByField(FAMDecl);
1001 if (!CountedByFD)
1002 // Can't find the field referenced by the "counted_by" attribute.
1003 return nullptr;
1004
1005 // Build a load of the counted_by field.
1006 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1007 Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD);
1008 if (!CountedByInst)
1009 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1010
1011 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1012
1013 // Build a load of the index and subtract it from the count.
1014 Value *IdxInst = nullptr;
1015 if (Idx) {
1016 if (Idx->HasSideEffects(getContext()))
1017 // We can't have side-effects.
1018 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1019
1020 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1021 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1022 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1023
1024 // We go ahead with the calculation here. If the index turns out to be
1025 // negative, we'll catch it at the end.
1026 CountedByInst =
1027 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1028 }
1029
1030 // Calculate how large the flexible array member is in bytes.
1031 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1033 llvm::Constant *ElemSize =
1034 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1035 Value *FAMSize =
1036 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1037 FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned);
1038 Value *Res = FAMSize;
1039
1040 if (isa<DeclRefExpr>(Base)) {
1041 // The whole struct is specificed in the __bdos.
1042 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
1043
1044 // Get the offset of the FAM.
1045 llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned);
1046 Value *OffsetAndFAMSize =
1047 Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
1048
1049 // Get the full size of the struct.
1050 llvm::Constant *SizeofStruct =
1051 ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
1052
1053 // max(sizeof(struct s),
1054 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1055 Res = IsSigned
1056 ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
1057 OffsetAndFAMSize, SizeofStruct)
1058 : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
1059 OffsetAndFAMSize, SizeofStruct);
1060 }
1061
1062 // A negative \p IdxInst or \p CountedByInst means that the index lands
1063 // outside of the flexible array member. If that's the case, we want to
1064 // return 0.
1065 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1066 if (IdxInst)
1067 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1068
1069 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1070}
1071
1072/// Returns a Value corresponding to the size of the given expression.
1073/// This Value may be either of the following:
1074/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1075/// it)
1076/// - A call to the @llvm.objectsize intrinsic
1077///
1078/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1079/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1080/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1081llvm::Value *
1082CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1083 llvm::IntegerType *ResType,
1084 llvm::Value *EmittedE, bool IsDynamic) {
1085 // We need to reference an argument if the pointer is a parameter with the
1086 // pass_object_size attribute.
1087 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1088 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1089 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1090 if (Param != nullptr && PS != nullptr &&
1091 areBOSTypesCompatible(PS->getType(), Type)) {
1092 auto Iter = SizeArguments.find(Param);
1093 assert(Iter != SizeArguments.end());
1094
1095 const ImplicitParamDecl *D = Iter->second;
1096 auto DIter = LocalDeclMap.find(D);
1097 assert(DIter != LocalDeclMap.end());
1098
1099 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1100 getContext().getSizeType(), E->getBeginLoc());
1101 }
1102 }
1103
1104 if (IsDynamic) {
1105 // Emit special code for a flexible array member with the "counted_by"
1106 // attribute.
1107 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1108 return V;
1109 }
1110
1111 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1112 // evaluate E for side-effects. In either case, we shouldn't lower to
1113 // @llvm.objectsize.
1114 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1115 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1116
1117 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1118 assert(Ptr->getType()->isPointerTy() &&
1119 "Non-pointer passed to __builtin_object_size?");
1120
1121 Function *F =
1122 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1123
1124 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1125 Value *Min = Builder.getInt1((Type & 2) != 0);
1126 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1127 Value *NullIsUnknown = Builder.getTrue();
1128 Value *Dynamic = Builder.getInt1(IsDynamic);
1129 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1130}
1131
1132namespace {
1133/// A struct to generically describe a bit test intrinsic.
1134struct BitTest {
1135 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1136 enum InterlockingKind : uint8_t {
1137 Unlocked,
1138 Sequential,
1139 Acquire,
1140 Release,
1141 NoFence
1142 };
1143
1144 ActionKind Action;
1145 InterlockingKind Interlocking;
1146 bool Is64Bit;
1147
1148 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1149};
1150
1151} // namespace
1152
1153BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1154 switch (BuiltinID) {
1155 // Main portable variants.
1156 case Builtin::BI_bittest:
1157 return {TestOnly, Unlocked, false};
1158 case Builtin::BI_bittestandcomplement:
1159 return {Complement, Unlocked, false};
1160 case Builtin::BI_bittestandreset:
1161 return {Reset, Unlocked, false};
1162 case Builtin::BI_bittestandset:
1163 return {Set, Unlocked, false};
1164 case Builtin::BI_interlockedbittestandreset:
1165 return {Reset, Sequential, false};
1166 case Builtin::BI_interlockedbittestandset:
1167 return {Set, Sequential, false};
1168
1169 // X86-specific 64-bit variants.
1170 case Builtin::BI_bittest64:
1171 return {TestOnly, Unlocked, true};
1172 case Builtin::BI_bittestandcomplement64:
1173 return {Complement, Unlocked, true};
1174 case Builtin::BI_bittestandreset64:
1175 return {Reset, Unlocked, true};
1176 case Builtin::BI_bittestandset64:
1177 return {Set, Unlocked, true};
1178 case Builtin::BI_interlockedbittestandreset64:
1179 return {Reset, Sequential, true};
1180 case Builtin::BI_interlockedbittestandset64:
1181 return {Set, Sequential, true};
1182
1183 // ARM/AArch64-specific ordering variants.
1184 case Builtin::BI_interlockedbittestandset_acq:
1185 return {Set, Acquire, false};
1186 case Builtin::BI_interlockedbittestandset_rel:
1187 return {Set, Release, false};
1188 case Builtin::BI_interlockedbittestandset_nf:
1189 return {Set, NoFence, false};
1190 case Builtin::BI_interlockedbittestandreset_acq:
1191 return {Reset, Acquire, false};
1192 case Builtin::BI_interlockedbittestandreset_rel:
1193 return {Reset, Release, false};
1194 case Builtin::BI_interlockedbittestandreset_nf:
1195 return {Reset, NoFence, false};
1196 }
1197 llvm_unreachable("expected only bittest intrinsics");
1198}
1199
1200static char bitActionToX86BTCode(BitTest::ActionKind A) {
1201 switch (A) {
1202 case BitTest::TestOnly: return '\0';
1203 case BitTest::Complement: return 'c';
1204 case BitTest::Reset: return 'r';
1205 case BitTest::Set: return 's';
1206 }
1207 llvm_unreachable("invalid action");
1208}
1209
1211 BitTest BT,
1212 const CallExpr *E, Value *BitBase,
1213 Value *BitPos) {
1214 char Action = bitActionToX86BTCode(BT.Action);
1215 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1216
1217 // Build the assembly.
1219 raw_svector_ostream AsmOS(Asm);
1220 if (BT.Interlocking != BitTest::Unlocked)
1221 AsmOS << "lock ";
1222 AsmOS << "bt";
1223 if (Action)
1224 AsmOS << Action;
1225 AsmOS << SizeSuffix << " $2, ($1)";
1226
1227 // Build the constraints. FIXME: We should support immediates when possible.
1228 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1229 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1230 if (!MachineClobbers.empty()) {
1231 Constraints += ',';
1232 Constraints += MachineClobbers;
1233 }
1234 llvm::IntegerType *IntType = llvm::IntegerType::get(
1235 CGF.getLLVMContext(),
1236 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1237 llvm::FunctionType *FTy =
1238 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1239
1240 llvm::InlineAsm *IA =
1241 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1242 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1243}
1244
1245static llvm::AtomicOrdering
1246getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1247 switch (I) {
1248 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1249 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1250 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1251 case BitTest::Release: return llvm::AtomicOrdering::Release;
1252 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1253 }
1254 llvm_unreachable("invalid interlocking");
1255}
1256
1257/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1258/// bits and a bit position and read and optionally modify the bit at that
1259/// position. The position index can be arbitrarily large, i.e. it can be larger
1260/// than 31 or 63, so we need an indexed load in the general case.
1261static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1262 unsigned BuiltinID,
1263 const CallExpr *E) {
1264 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1265 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1266
1267 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1268
1269 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1270 // indexing operation internally. Use them if possible.
1271 if (CGF.getTarget().getTriple().isX86())
1272 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1273
1274 // Otherwise, use generic code to load one byte and test the bit. Use all but
1275 // the bottom three bits as the array index, and the bottom three bits to form
1276 // a mask.
1277 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1278 Value *ByteIndex = CGF.Builder.CreateAShr(
1279 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1280 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
1281 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
1282 ByteIndex, "bittest.byteaddr"),
1283 CGF.Int8Ty, CharUnits::One());
1284 Value *PosLow =
1285 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1286 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1287
1288 // The updating instructions will need a mask.
1289 Value *Mask = nullptr;
1290 if (BT.Action != BitTest::TestOnly) {
1291 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1292 "bittest.mask");
1293 }
1294
1295 // Check the action and ordering of the interlocked intrinsics.
1296 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1297
1298 Value *OldByte = nullptr;
1299 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1300 // Emit a combined atomicrmw load/store operation for the interlocked
1301 // intrinsics.
1302 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1303 if (BT.Action == BitTest::Reset) {
1304 Mask = CGF.Builder.CreateNot(Mask);
1305 RMWOp = llvm::AtomicRMWInst::And;
1306 }
1307 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1308 } else {
1309 // Emit a plain load for the non-interlocked intrinsics.
1310 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1311 Value *NewByte = nullptr;
1312 switch (BT.Action) {
1313 case BitTest::TestOnly:
1314 // Don't store anything.
1315 break;
1316 case BitTest::Complement:
1317 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1318 break;
1319 case BitTest::Reset:
1320 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1321 break;
1322 case BitTest::Set:
1323 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1324 break;
1325 }
1326 if (NewByte)
1327 CGF.Builder.CreateStore(NewByte, ByteAddr);
1328 }
1329
1330 // However we loaded the old byte, either by plain load or atomicrmw, shift
1331 // the bit into the low position and mask it to 0 or 1.
1332 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1333 return CGF.Builder.CreateAnd(
1334 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1335}
1336
1338 unsigned BuiltinID,
1339 const CallExpr *E) {
1340 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1341
1343 raw_svector_ostream AsmOS(Asm);
1344 llvm::IntegerType *RetType = CGF.Int32Ty;
1345
1346 switch (BuiltinID) {
1347 case clang::PPC::BI__builtin_ppc_ldarx:
1348 AsmOS << "ldarx ";
1349 RetType = CGF.Int64Ty;
1350 break;
1351 case clang::PPC::BI__builtin_ppc_lwarx:
1352 AsmOS << "lwarx ";
1353 RetType = CGF.Int32Ty;
1354 break;
1355 case clang::PPC::BI__builtin_ppc_lharx:
1356 AsmOS << "lharx ";
1357 RetType = CGF.Int16Ty;
1358 break;
1359 case clang::PPC::BI__builtin_ppc_lbarx:
1360 AsmOS << "lbarx ";
1361 RetType = CGF.Int8Ty;
1362 break;
1363 default:
1364 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1365 }
1366
1367 AsmOS << "$0, ${1:y}";
1368
1369 std::string Constraints = "=r,*Z,~{memory}";
1370 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1371 if (!MachineClobbers.empty()) {
1372 Constraints += ',';
1373 Constraints += MachineClobbers;
1374 }
1375
1376 llvm::Type *PtrType = CGF.UnqualPtrTy;
1377 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1378
1379 llvm::InlineAsm *IA =
1380 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1381 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1382 CI->addParamAttr(
1383 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1384 return CI;
1385}
1386
1387namespace {
1388enum class MSVCSetJmpKind {
1389 _setjmpex,
1390 _setjmp3,
1391 _setjmp
1392};
1393}
1394
1395/// MSVC handles setjmp a bit differently on different platforms. On every
1396/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1397/// parameters can be passed as variadic arguments, but we always pass none.
1398static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1399 const CallExpr *E) {
1400 llvm::Value *Arg1 = nullptr;
1401 llvm::Type *Arg1Ty = nullptr;
1402 StringRef Name;
1403 bool IsVarArg = false;
1404 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1405 Name = "_setjmp3";
1406 Arg1Ty = CGF.Int32Ty;
1407 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1408 IsVarArg = true;
1409 } else {
1410 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1411 Arg1Ty = CGF.Int8PtrTy;
1412 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1413 Arg1 = CGF.Builder.CreateCall(
1414 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1415 } else
1416 Arg1 = CGF.Builder.CreateCall(
1417 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1418 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1419 }
1420
1421 // Mark the call site and declaration with ReturnsTwice.
1422 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1423 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1424 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1425 llvm::Attribute::ReturnsTwice);
1426 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1427 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1428 ReturnsTwiceAttr, /*Local=*/true);
1429
1430 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1431 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1432 llvm::Value *Args[] = {Buf, Arg1};
1433 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1434 CB->setAttributes(ReturnsTwiceAttr);
1435 return RValue::get(CB);
1436}
1437
1438// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1439// we handle them here.
1479 __fastfail,
1480};
1481
1482static std::optional<CodeGenFunction::MSVCIntrin>
1483translateArmToMsvcIntrin(unsigned BuiltinID) {
1484 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1485 switch (BuiltinID) {
1486 default:
1487 return std::nullopt;
1488 case clang::ARM::BI_BitScanForward:
1489 case clang::ARM::BI_BitScanForward64:
1490 return MSVCIntrin::_BitScanForward;
1491 case clang::ARM::BI_BitScanReverse:
1492 case clang::ARM::BI_BitScanReverse64:
1493 return MSVCIntrin::_BitScanReverse;
1494 case clang::ARM::BI_InterlockedAnd64:
1495 return MSVCIntrin::_InterlockedAnd;
1496 case clang::ARM::BI_InterlockedExchange64:
1497 return MSVCIntrin::_InterlockedExchange;
1498 case clang::ARM::BI_InterlockedExchangeAdd64:
1499 return MSVCIntrin::_InterlockedExchangeAdd;
1500 case clang::ARM::BI_InterlockedExchangeSub64:
1501 return MSVCIntrin::_InterlockedExchangeSub;
1502 case clang::ARM::BI_InterlockedOr64:
1503 return MSVCIntrin::_InterlockedOr;
1504 case clang::ARM::BI_InterlockedXor64:
1505 return MSVCIntrin::_InterlockedXor;
1506 case clang::ARM::BI_InterlockedDecrement64:
1507 return MSVCIntrin::_InterlockedDecrement;
1508 case clang::ARM::BI_InterlockedIncrement64:
1509 return MSVCIntrin::_InterlockedIncrement;
1510 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1511 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1512 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1513 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1514 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1515 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1516 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1517 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1518 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1519 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1520 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1521 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1522 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1523 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1524 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1525 case clang::ARM::BI_InterlockedExchange8_acq:
1526 case clang::ARM::BI_InterlockedExchange16_acq:
1527 case clang::ARM::BI_InterlockedExchange_acq:
1528 case clang::ARM::BI_InterlockedExchange64_acq:
1529 return MSVCIntrin::_InterlockedExchange_acq;
1530 case clang::ARM::BI_InterlockedExchange8_rel:
1531 case clang::ARM::BI_InterlockedExchange16_rel:
1532 case clang::ARM::BI_InterlockedExchange_rel:
1533 case clang::ARM::BI_InterlockedExchange64_rel:
1534 return MSVCIntrin::_InterlockedExchange_rel;
1535 case clang::ARM::BI_InterlockedExchange8_nf:
1536 case clang::ARM::BI_InterlockedExchange16_nf:
1537 case clang::ARM::BI_InterlockedExchange_nf:
1538 case clang::ARM::BI_InterlockedExchange64_nf:
1539 return MSVCIntrin::_InterlockedExchange_nf;
1540 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1541 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1542 case clang::ARM::BI_InterlockedCompareExchange_acq:
1543 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1544 return MSVCIntrin::_InterlockedCompareExchange_acq;
1545 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1546 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1547 case clang::ARM::BI_InterlockedCompareExchange_rel:
1548 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1549 return MSVCIntrin::_InterlockedCompareExchange_rel;
1550 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1551 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1552 case clang::ARM::BI_InterlockedCompareExchange_nf:
1553 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1554 return MSVCIntrin::_InterlockedCompareExchange_nf;
1555 case clang::ARM::BI_InterlockedOr8_acq:
1556 case clang::ARM::BI_InterlockedOr16_acq:
1557 case clang::ARM::BI_InterlockedOr_acq:
1558 case clang::ARM::BI_InterlockedOr64_acq:
1559 return MSVCIntrin::_InterlockedOr_acq;
1560 case clang::ARM::BI_InterlockedOr8_rel:
1561 case clang::ARM::BI_InterlockedOr16_rel:
1562 case clang::ARM::BI_InterlockedOr_rel:
1563 case clang::ARM::BI_InterlockedOr64_rel:
1564 return MSVCIntrin::_InterlockedOr_rel;
1565 case clang::ARM::BI_InterlockedOr8_nf:
1566 case clang::ARM::BI_InterlockedOr16_nf:
1567 case clang::ARM::BI_InterlockedOr_nf:
1568 case clang::ARM::BI_InterlockedOr64_nf:
1569 return MSVCIntrin::_InterlockedOr_nf;
1570 case clang::ARM::BI_InterlockedXor8_acq:
1571 case clang::ARM::BI_InterlockedXor16_acq:
1572 case clang::ARM::BI_InterlockedXor_acq:
1573 case clang::ARM::BI_InterlockedXor64_acq:
1574 return MSVCIntrin::_InterlockedXor_acq;
1575 case clang::ARM::BI_InterlockedXor8_rel:
1576 case clang::ARM::BI_InterlockedXor16_rel:
1577 case clang::ARM::BI_InterlockedXor_rel:
1578 case clang::ARM::BI_InterlockedXor64_rel:
1579 return MSVCIntrin::_InterlockedXor_rel;
1580 case clang::ARM::BI_InterlockedXor8_nf:
1581 case clang::ARM::BI_InterlockedXor16_nf:
1582 case clang::ARM::BI_InterlockedXor_nf:
1583 case clang::ARM::BI_InterlockedXor64_nf:
1584 return MSVCIntrin::_InterlockedXor_nf;
1585 case clang::ARM::BI_InterlockedAnd8_acq:
1586 case clang::ARM::BI_InterlockedAnd16_acq:
1587 case clang::ARM::BI_InterlockedAnd_acq:
1588 case clang::ARM::BI_InterlockedAnd64_acq:
1589 return MSVCIntrin::_InterlockedAnd_acq;
1590 case clang::ARM::BI_InterlockedAnd8_rel:
1591 case clang::ARM::BI_InterlockedAnd16_rel:
1592 case clang::ARM::BI_InterlockedAnd_rel:
1593 case clang::ARM::BI_InterlockedAnd64_rel:
1594 return MSVCIntrin::_InterlockedAnd_rel;
1595 case clang::ARM::BI_InterlockedAnd8_nf:
1596 case clang::ARM::BI_InterlockedAnd16_nf:
1597 case clang::ARM::BI_InterlockedAnd_nf:
1598 case clang::ARM::BI_InterlockedAnd64_nf:
1599 return MSVCIntrin::_InterlockedAnd_nf;
1600 case clang::ARM::BI_InterlockedIncrement16_acq:
1601 case clang::ARM::BI_InterlockedIncrement_acq:
1602 case clang::ARM::BI_InterlockedIncrement64_acq:
1603 return MSVCIntrin::_InterlockedIncrement_acq;
1604 case clang::ARM::BI_InterlockedIncrement16_rel:
1605 case clang::ARM::BI_InterlockedIncrement_rel:
1606 case clang::ARM::BI_InterlockedIncrement64_rel:
1607 return MSVCIntrin::_InterlockedIncrement_rel;
1608 case clang::ARM::BI_InterlockedIncrement16_nf:
1609 case clang::ARM::BI_InterlockedIncrement_nf:
1610 case clang::ARM::BI_InterlockedIncrement64_nf:
1611 return MSVCIntrin::_InterlockedIncrement_nf;
1612 case clang::ARM::BI_InterlockedDecrement16_acq:
1613 case clang::ARM::BI_InterlockedDecrement_acq:
1614 case clang::ARM::BI_InterlockedDecrement64_acq:
1615 return MSVCIntrin::_InterlockedDecrement_acq;
1616 case clang::ARM::BI_InterlockedDecrement16_rel:
1617 case clang::ARM::BI_InterlockedDecrement_rel:
1618 case clang::ARM::BI_InterlockedDecrement64_rel:
1619 return MSVCIntrin::_InterlockedDecrement_rel;
1620 case clang::ARM::BI_InterlockedDecrement16_nf:
1621 case clang::ARM::BI_InterlockedDecrement_nf:
1622 case clang::ARM::BI_InterlockedDecrement64_nf:
1623 return MSVCIntrin::_InterlockedDecrement_nf;
1624 }
1625 llvm_unreachable("must return from switch");
1626}
1627
1628static std::optional<CodeGenFunction::MSVCIntrin>
1629translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1630 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1631 switch (BuiltinID) {
1632 default:
1633 return std::nullopt;
1634 case clang::AArch64::BI_BitScanForward:
1635 case clang::AArch64::BI_BitScanForward64:
1636 return MSVCIntrin::_BitScanForward;
1637 case clang::AArch64::BI_BitScanReverse:
1638 case clang::AArch64::BI_BitScanReverse64:
1639 return MSVCIntrin::_BitScanReverse;
1640 case clang::AArch64::BI_InterlockedAnd64:
1641 return MSVCIntrin::_InterlockedAnd;
1642 case clang::AArch64::BI_InterlockedExchange64:
1643 return MSVCIntrin::_InterlockedExchange;
1644 case clang::AArch64::BI_InterlockedExchangeAdd64:
1645 return MSVCIntrin::_InterlockedExchangeAdd;
1646 case clang::AArch64::BI_InterlockedExchangeSub64:
1647 return MSVCIntrin::_InterlockedExchangeSub;
1648 case clang::AArch64::BI_InterlockedOr64:
1649 return MSVCIntrin::_InterlockedOr;
1650 case clang::AArch64::BI_InterlockedXor64:
1651 return MSVCIntrin::_InterlockedXor;
1652 case clang::AArch64::BI_InterlockedDecrement64:
1653 return MSVCIntrin::_InterlockedDecrement;
1654 case clang::AArch64::BI_InterlockedIncrement64:
1655 return MSVCIntrin::_InterlockedIncrement;
1656 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1657 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1658 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1659 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1660 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1661 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1662 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1663 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1664 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1665 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1666 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1667 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1668 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1669 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1670 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1671 case clang::AArch64::BI_InterlockedExchange8_acq:
1672 case clang::AArch64::BI_InterlockedExchange16_acq:
1673 case clang::AArch64::BI_InterlockedExchange_acq:
1674 case clang::AArch64::BI_InterlockedExchange64_acq:
1675 return MSVCIntrin::_InterlockedExchange_acq;
1676 case clang::AArch64::BI_InterlockedExchange8_rel:
1677 case clang::AArch64::BI_InterlockedExchange16_rel:
1678 case clang::AArch64::BI_InterlockedExchange_rel:
1679 case clang::AArch64::BI_InterlockedExchange64_rel:
1680 return MSVCIntrin::_InterlockedExchange_rel;
1681 case clang::AArch64::BI_InterlockedExchange8_nf:
1682 case clang::AArch64::BI_InterlockedExchange16_nf:
1683 case clang::AArch64::BI_InterlockedExchange_nf:
1684 case clang::AArch64::BI_InterlockedExchange64_nf:
1685 return MSVCIntrin::_InterlockedExchange_nf;
1686 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1687 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1688 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1689 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1690 return MSVCIntrin::_InterlockedCompareExchange_acq;
1691 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1692 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1693 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1694 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1695 return MSVCIntrin::_InterlockedCompareExchange_rel;
1696 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1697 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1698 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1699 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1700 return MSVCIntrin::_InterlockedCompareExchange_nf;
1701 case clang::AArch64::BI_InterlockedCompareExchange128:
1702 return MSVCIntrin::_InterlockedCompareExchange128;
1703 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1704 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1705 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1706 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1707 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1708 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1709 case clang::AArch64::BI_InterlockedOr8_acq:
1710 case clang::AArch64::BI_InterlockedOr16_acq:
1711 case clang::AArch64::BI_InterlockedOr_acq:
1712 case clang::AArch64::BI_InterlockedOr64_acq:
1713 return MSVCIntrin::_InterlockedOr_acq;
1714 case clang::AArch64::BI_InterlockedOr8_rel:
1715 case clang::AArch64::BI_InterlockedOr16_rel:
1716 case clang::AArch64::BI_InterlockedOr_rel:
1717 case clang::AArch64::BI_InterlockedOr64_rel:
1718 return MSVCIntrin::_InterlockedOr_rel;
1719 case clang::AArch64::BI_InterlockedOr8_nf:
1720 case clang::AArch64::BI_InterlockedOr16_nf:
1721 case clang::AArch64::BI_InterlockedOr_nf:
1722 case clang::AArch64::BI_InterlockedOr64_nf:
1723 return MSVCIntrin::_InterlockedOr_nf;
1724 case clang::AArch64::BI_InterlockedXor8_acq:
1725 case clang::AArch64::BI_InterlockedXor16_acq:
1726 case clang::AArch64::BI_InterlockedXor_acq:
1727 case clang::AArch64::BI_InterlockedXor64_acq:
1728 return MSVCIntrin::_InterlockedXor_acq;
1729 case clang::AArch64::BI_InterlockedXor8_rel:
1730 case clang::AArch64::BI_InterlockedXor16_rel:
1731 case clang::AArch64::BI_InterlockedXor_rel:
1732 case clang::AArch64::BI_InterlockedXor64_rel:
1733 return MSVCIntrin::_InterlockedXor_rel;
1734 case clang::AArch64::BI_InterlockedXor8_nf:
1735 case clang::AArch64::BI_InterlockedXor16_nf:
1736 case clang::AArch64::BI_InterlockedXor_nf:
1737 case clang::AArch64::BI_InterlockedXor64_nf:
1738 return MSVCIntrin::_InterlockedXor_nf;
1739 case clang::AArch64::BI_InterlockedAnd8_acq:
1740 case clang::AArch64::BI_InterlockedAnd16_acq:
1741 case clang::AArch64::BI_InterlockedAnd_acq:
1742 case clang::AArch64::BI_InterlockedAnd64_acq:
1743 return MSVCIntrin::_InterlockedAnd_acq;
1744 case clang::AArch64::BI_InterlockedAnd8_rel:
1745 case clang::AArch64::BI_InterlockedAnd16_rel:
1746 case clang::AArch64::BI_InterlockedAnd_rel:
1747 case clang::AArch64::BI_InterlockedAnd64_rel:
1748 return MSVCIntrin::_InterlockedAnd_rel;
1749 case clang::AArch64::BI_InterlockedAnd8_nf:
1750 case clang::AArch64::BI_InterlockedAnd16_nf:
1751 case clang::AArch64::BI_InterlockedAnd_nf:
1752 case clang::AArch64::BI_InterlockedAnd64_nf:
1753 return MSVCIntrin::_InterlockedAnd_nf;
1754 case clang::AArch64::BI_InterlockedIncrement16_acq:
1755 case clang::AArch64::BI_InterlockedIncrement_acq:
1756 case clang::AArch64::BI_InterlockedIncrement64_acq:
1757 return MSVCIntrin::_InterlockedIncrement_acq;
1758 case clang::AArch64::BI_InterlockedIncrement16_rel:
1759 case clang::AArch64::BI_InterlockedIncrement_rel:
1760 case clang::AArch64::BI_InterlockedIncrement64_rel:
1761 return MSVCIntrin::_InterlockedIncrement_rel;
1762 case clang::AArch64::BI_InterlockedIncrement16_nf:
1763 case clang::AArch64::BI_InterlockedIncrement_nf:
1764 case clang::AArch64::BI_InterlockedIncrement64_nf:
1765 return MSVCIntrin::_InterlockedIncrement_nf;
1766 case clang::AArch64::BI_InterlockedDecrement16_acq:
1767 case clang::AArch64::BI_InterlockedDecrement_acq:
1768 case clang::AArch64::BI_InterlockedDecrement64_acq:
1769 return MSVCIntrin::_InterlockedDecrement_acq;
1770 case clang::AArch64::BI_InterlockedDecrement16_rel:
1771 case clang::AArch64::BI_InterlockedDecrement_rel:
1772 case clang::AArch64::BI_InterlockedDecrement64_rel:
1773 return MSVCIntrin::_InterlockedDecrement_rel;
1774 case clang::AArch64::BI_InterlockedDecrement16_nf:
1775 case clang::AArch64::BI_InterlockedDecrement_nf:
1776 case clang::AArch64::BI_InterlockedDecrement64_nf:
1777 return MSVCIntrin::_InterlockedDecrement_nf;
1778 }
1779 llvm_unreachable("must return from switch");
1780}
1781
1782static std::optional<CodeGenFunction::MSVCIntrin>
1783translateX86ToMsvcIntrin(unsigned BuiltinID) {
1784 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1785 switch (BuiltinID) {
1786 default:
1787 return std::nullopt;
1788 case clang::X86::BI_BitScanForward:
1789 case clang::X86::BI_BitScanForward64:
1790 return MSVCIntrin::_BitScanForward;
1791 case clang::X86::BI_BitScanReverse:
1792 case clang::X86::BI_BitScanReverse64:
1793 return MSVCIntrin::_BitScanReverse;
1794 case clang::X86::BI_InterlockedAnd64:
1795 return MSVCIntrin::_InterlockedAnd;
1796 case clang::X86::BI_InterlockedCompareExchange128:
1797 return MSVCIntrin::_InterlockedCompareExchange128;
1798 case clang::X86::BI_InterlockedExchange64:
1799 return MSVCIntrin::_InterlockedExchange;
1800 case clang::X86::BI_InterlockedExchangeAdd64:
1801 return MSVCIntrin::_InterlockedExchangeAdd;
1802 case clang::X86::BI_InterlockedExchangeSub64:
1803 return MSVCIntrin::_InterlockedExchangeSub;
1804 case clang::X86::BI_InterlockedOr64:
1805 return MSVCIntrin::_InterlockedOr;
1806 case clang::X86::BI_InterlockedXor64:
1807 return MSVCIntrin::_InterlockedXor;
1808 case clang::X86::BI_InterlockedDecrement64:
1809 return MSVCIntrin::_InterlockedDecrement;
1810 case clang::X86::BI_InterlockedIncrement64:
1811 return MSVCIntrin::_InterlockedIncrement;
1812 }
1813 llvm_unreachable("must return from switch");
1814}
1815
1816// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1817Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1818 const CallExpr *E) {
1819 switch (BuiltinID) {
1820 case MSVCIntrin::_BitScanForward:
1821 case MSVCIntrin::_BitScanReverse: {
1822 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1823 Value *ArgValue = EmitScalarExpr(E->getArg(1));
1824
1825 llvm::Type *ArgType = ArgValue->getType();
1826 llvm::Type *IndexType = IndexAddress.getElementType();
1827 llvm::Type *ResultType = ConvertType(E->getType());
1828
1829 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1830 Value *ResZero = llvm::Constant::getNullValue(ResultType);
1831 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1832
1833 BasicBlock *Begin = Builder.GetInsertBlock();
1834 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1835 Builder.SetInsertPoint(End);
1836 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1837
1838 Builder.SetInsertPoint(Begin);
1839 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1840 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1841 Builder.CreateCondBr(IsZero, End, NotZero);
1842 Result->addIncoming(ResZero, Begin);
1843
1844 Builder.SetInsertPoint(NotZero);
1845
1846 if (BuiltinID == MSVCIntrin::_BitScanForward) {
1847 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1848 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1849 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1850 Builder.CreateStore(ZeroCount, IndexAddress, false);
1851 } else {
1852 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1853 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
1854
1855 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1856 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1857 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1858 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
1859 Builder.CreateStore(Index, IndexAddress, false);
1860 }
1861 Builder.CreateBr(End);
1862 Result->addIncoming(ResOne, NotZero);
1863
1864 Builder.SetInsertPoint(End);
1865 return Result;
1866 }
1867 case MSVCIntrin::_InterlockedAnd:
1868 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
1869 case MSVCIntrin::_InterlockedExchange:
1870 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
1871 case MSVCIntrin::_InterlockedExchangeAdd:
1872 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
1873 case MSVCIntrin::_InterlockedExchangeSub:
1874 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
1875 case MSVCIntrin::_InterlockedOr:
1876 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
1877 case MSVCIntrin::_InterlockedXor:
1878 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
1879 case MSVCIntrin::_InterlockedExchangeAdd_acq:
1880 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1881 AtomicOrdering::Acquire);
1882 case MSVCIntrin::_InterlockedExchangeAdd_rel:
1883 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1884 AtomicOrdering::Release);
1885 case MSVCIntrin::_InterlockedExchangeAdd_nf:
1886 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1887 AtomicOrdering::Monotonic);
1888 case MSVCIntrin::_InterlockedExchange_acq:
1889 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1890 AtomicOrdering::Acquire);
1891 case MSVCIntrin::_InterlockedExchange_rel:
1892 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1893 AtomicOrdering::Release);
1894 case MSVCIntrin::_InterlockedExchange_nf:
1895 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1896 AtomicOrdering::Monotonic);
1897 case MSVCIntrin::_InterlockedCompareExchange_acq:
1898 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
1899 case MSVCIntrin::_InterlockedCompareExchange_rel:
1900 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
1901 case MSVCIntrin::_InterlockedCompareExchange_nf:
1902 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1903 case MSVCIntrin::_InterlockedCompareExchange128:
1905 *this, E, AtomicOrdering::SequentiallyConsistent);
1906 case MSVCIntrin::_InterlockedCompareExchange128_acq:
1907 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
1908 case MSVCIntrin::_InterlockedCompareExchange128_rel:
1909 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
1910 case MSVCIntrin::_InterlockedCompareExchange128_nf:
1911 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1912 case MSVCIntrin::_InterlockedOr_acq:
1913 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1914 AtomicOrdering::Acquire);
1915 case MSVCIntrin::_InterlockedOr_rel:
1916 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1917 AtomicOrdering::Release);
1918 case MSVCIntrin::_InterlockedOr_nf:
1919 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1920 AtomicOrdering::Monotonic);
1921 case MSVCIntrin::_InterlockedXor_acq:
1922 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1923 AtomicOrdering::Acquire);
1924 case MSVCIntrin::_InterlockedXor_rel:
1925 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1926 AtomicOrdering::Release);
1927 case MSVCIntrin::_InterlockedXor_nf:
1928 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1929 AtomicOrdering::Monotonic);
1930 case MSVCIntrin::_InterlockedAnd_acq:
1931 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1932 AtomicOrdering::Acquire);
1933 case MSVCIntrin::_InterlockedAnd_rel:
1934 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1935 AtomicOrdering::Release);
1936 case MSVCIntrin::_InterlockedAnd_nf:
1937 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1938 AtomicOrdering::Monotonic);
1939 case MSVCIntrin::_InterlockedIncrement_acq:
1940 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1941 case MSVCIntrin::_InterlockedIncrement_rel:
1942 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1943 case MSVCIntrin::_InterlockedIncrement_nf:
1944 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1945 case MSVCIntrin::_InterlockedDecrement_acq:
1946 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1947 case MSVCIntrin::_InterlockedDecrement_rel:
1948 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1949 case MSVCIntrin::_InterlockedDecrement_nf:
1950 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1951
1952 case MSVCIntrin::_InterlockedDecrement:
1953 return EmitAtomicDecrementValue(*this, E);
1954 case MSVCIntrin::_InterlockedIncrement:
1955 return EmitAtomicIncrementValue(*this, E);
1956
1957 case MSVCIntrin::__fastfail: {
1958 // Request immediate process termination from the kernel. The instruction
1959 // sequences to do this are documented on MSDN:
1960 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1961 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1962 StringRef Asm, Constraints;
1963 switch (ISA) {
1964 default:
1965 ErrorUnsupported(E, "__fastfail call for this architecture");
1966 break;
1967 case llvm::Triple::x86:
1968 case llvm::Triple::x86_64:
1969 Asm = "int $$0x29";
1970 Constraints = "{cx}";
1971 break;
1972 case llvm::Triple::thumb:
1973 Asm = "udf #251";
1974 Constraints = "{r0}";
1975 break;
1976 case llvm::Triple::aarch64:
1977 Asm = "brk #0xF003";
1978 Constraints = "{w0}";
1979 }
1980 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1981 llvm::InlineAsm *IA =
1982 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1983 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1984 getLLVMContext(), llvm::AttributeList::FunctionIndex,
1985 llvm::Attribute::NoReturn);
1986 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1987 CI->setAttributes(NoReturnAttr);
1988 return CI;
1989 }
1990 }
1991 llvm_unreachable("Incorrect MSVC intrinsic!");
1992}
1993
1994namespace {
1995// ARC cleanup for __builtin_os_log_format
1996struct CallObjCArcUse final : EHScopeStack::Cleanup {
1997 CallObjCArcUse(llvm::Value *object) : object(object) {}
1998 llvm::Value *object;
1999
2000 void Emit(CodeGenFunction &CGF, Flags flags) override {
2001 CGF.EmitARCIntrinsicUse(object);
2002 }
2003};
2004}
2005
2007 BuiltinCheckKind Kind) {
2008 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
2009 && "Unsupported builtin check kind");
2010
2011 Value *ArgValue = EmitScalarExpr(E);
2012 if (!SanOpts.has(SanitizerKind::Builtin))
2013 return ArgValue;
2014
2015 SanitizerScope SanScope(this);
2016 Value *Cond = Builder.CreateICmpNE(
2017 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2018 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2019 SanitizerHandler::InvalidBuiltin,
2021 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2022 std::nullopt);
2023 return ArgValue;
2024}
2025
2026static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2027 return CGF.Builder.CreateBinaryIntrinsic(
2028 Intrinsic::abs, ArgValue,
2029 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2030}
2031
2033 bool SanitizeOverflow) {
2034 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2035
2036 // Try to eliminate overflow check.
2037 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2038 if (!VCI->isMinSignedValue())
2039 return EmitAbs(CGF, ArgValue, true);
2040 }
2041
2042 CodeGenFunction::SanitizerScope SanScope(&CGF);
2043
2044 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2045 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2046 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2047 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2048 Value *NotOverflow = CGF.Builder.CreateNot(
2049 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2050
2051 // TODO: support -ftrapv-handler.
2052 if (SanitizeOverflow) {
2053 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2054 SanitizerHandler::NegateOverflow,
2055 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2057 {ArgValue});
2058 } else
2059 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2060
2061 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2062 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2063}
2064
2065/// Get the argument type for arguments to os_log_helper.
2067 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2068 return C.getCanonicalType(UnsignedTy);
2069}
2070
2073 CharUnits BufferAlignment) {
2074 ASTContext &Ctx = getContext();
2075
2077 {
2078 raw_svector_ostream OS(Name);
2079 OS << "__os_log_helper";
2080 OS << "_" << BufferAlignment.getQuantity();
2081 OS << "_" << int(Layout.getSummaryByte());
2082 OS << "_" << int(Layout.getNumArgsByte());
2083 for (const auto &Item : Layout.Items)
2084 OS << "_" << int(Item.getSizeByte()) << "_"
2085 << int(Item.getDescriptorByte());
2086 }
2087
2088 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2089 return F;
2090
2092 FunctionArgList Args;
2093 Args.push_back(ImplicitParamDecl::Create(
2094 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2096 ArgTys.emplace_back(Ctx.VoidPtrTy);
2097
2098 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2099 char Size = Layout.Items[I].getSizeByte();
2100 if (!Size)
2101 continue;
2102
2103 QualType ArgTy = getOSLogArgType(Ctx, Size);
2104 Args.push_back(ImplicitParamDecl::Create(
2105 Ctx, nullptr, SourceLocation(),
2106 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2108 ArgTys.emplace_back(ArgTy);
2109 }
2110
2111 QualType ReturnTy = Ctx.VoidTy;
2112
2113 // The helper function has linkonce_odr linkage to enable the linker to merge
2114 // identical functions. To ensure the merging always happens, 'noinline' is
2115 // attached to the function when compiling with -Oz.
2116 const CGFunctionInfo &FI =
2118 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2119 llvm::Function *Fn = llvm::Function::Create(
2120 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2121 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2122 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2124 Fn->setDoesNotThrow();
2125
2126 // Attach 'noinline' at -Oz.
2127 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2128 Fn->addFnAttr(llvm::Attribute::NoInline);
2129
2130 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2131 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2132
2133 // Create a scope with an artificial location for the body of this function.
2134 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2135
2136 CharUnits Offset;
2138 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2139 BufferAlignment);
2140 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2141 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2142 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2143 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2144
2145 unsigned I = 1;
2146 for (const auto &Item : Layout.Items) {
2148 Builder.getInt8(Item.getDescriptorByte()),
2149 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2151 Builder.getInt8(Item.getSizeByte()),
2152 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2153
2154 CharUnits Size = Item.size();
2155 if (!Size.getQuantity())
2156 continue;
2157
2158 Address Arg = GetAddrOfLocalVar(Args[I]);
2159 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2160 Addr = Addr.withElementType(Arg.getElementType());
2162 Offset += Size;
2163 ++I;
2164 }
2165
2167
2168 return Fn;
2169}
2170
2172 assert(E.getNumArgs() >= 2 &&
2173 "__builtin_os_log_format takes at least 2 arguments");
2174 ASTContext &Ctx = getContext();
2177 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2178 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2179
2180 // Ignore argument 1, the format string. It is not currently used.
2181 CallArgList Args;
2182 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2183
2184 for (const auto &Item : Layout.Items) {
2185 int Size = Item.getSizeByte();
2186 if (!Size)
2187 continue;
2188
2189 llvm::Value *ArgVal;
2190
2191 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2192 uint64_t Val = 0;
2193 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2194 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2195 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2196 } else if (const Expr *TheExpr = Item.getExpr()) {
2197 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2198
2199 // If a temporary object that requires destruction after the full
2200 // expression is passed, push a lifetime-extended cleanup to extend its
2201 // lifetime to the end of the enclosing block scope.
2202 auto LifetimeExtendObject = [&](const Expr *E) {
2203 E = E->IgnoreParenCasts();
2204 // Extend lifetimes of objects returned by function calls and message
2205 // sends.
2206
2207 // FIXME: We should do this in other cases in which temporaries are
2208 // created including arguments of non-ARC types (e.g., C++
2209 // temporaries).
2210 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2211 return true;
2212 return false;
2213 };
2214
2215 if (TheExpr->getType()->isObjCRetainableType() &&
2216 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2217 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2218 "Only scalar can be a ObjC retainable type");
2219 if (!isa<Constant>(ArgVal)) {
2220 CleanupKind Cleanup = getARCCleanupKind();
2221 QualType Ty = TheExpr->getType();
2223 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2224 ArgVal = EmitARCRetain(Ty, ArgVal);
2225 Builder.CreateStore(ArgVal, Addr);
2226 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2228 Cleanup & EHCleanup);
2229
2230 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2231 // argument has to be alive.
2232 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2233 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2234 }
2235 }
2236 } else {
2237 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2238 }
2239
2240 unsigned ArgValSize =
2241 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2242 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2243 ArgValSize);
2244 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2245 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2246 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2247 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2248 Args.add(RValue::get(ArgVal), ArgTy);
2249 }
2250
2251 const CGFunctionInfo &FI =
2254 Layout, BufAddr.getAlignment());
2256 return RValue::get(BufAddr, *this);
2257}
2258
2260 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2261 WidthAndSignedness ResultInfo) {
2262 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2263 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2264 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2265}
2266
2268 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2269 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2270 const clang::Expr *ResultArg, QualType ResultQTy,
2271 WidthAndSignedness ResultInfo) {
2273 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2274 "Cannot specialize this multiply");
2275
2276 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2277 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2278
2279 llvm::Value *HasOverflow;
2280 llvm::Value *Result = EmitOverflowIntrinsic(
2281 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2282
2283 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2284 // however, since the original builtin had a signed result, we need to report
2285 // an overflow when the result is greater than INT_MAX.
2286 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2287 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2288
2289 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2290 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2291
2292 bool isVolatile =
2293 ResultArg->getType()->getPointeeType().isVolatileQualified();
2294 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2295 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2296 isVolatile);
2297 return RValue::get(HasOverflow);
2298}
2299
2300/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2301static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2302 WidthAndSignedness Op1Info,
2303 WidthAndSignedness Op2Info,
2304 WidthAndSignedness ResultInfo) {
2305 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2306 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2307 Op1Info.Signed != Op2Info.Signed;
2308}
2309
2310/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2311/// the generic checked-binop irgen.
2312static RValue
2314 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2315 WidthAndSignedness Op2Info,
2316 const clang::Expr *ResultArg, QualType ResultQTy,
2317 WidthAndSignedness ResultInfo) {
2318 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2319 Op2Info, ResultInfo) &&
2320 "Not a mixed-sign multipliction we can specialize");
2321
2322 // Emit the signed and unsigned operands.
2323 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2324 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2325 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2326 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2327 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2328 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2329
2330 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2331 if (SignedOpWidth < UnsignedOpWidth)
2332 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2333 if (UnsignedOpWidth < SignedOpWidth)
2334 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2335
2336 llvm::Type *OpTy = Signed->getType();
2337 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2338 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2339 llvm::Type *ResTy = ResultPtr.getElementType();
2340 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2341
2342 // Take the absolute value of the signed operand.
2343 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2344 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2345 llvm::Value *AbsSigned =
2346 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2347
2348 // Perform a checked unsigned multiplication.
2349 llvm::Value *UnsignedOverflow;
2350 llvm::Value *UnsignedResult =
2351 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2352 Unsigned, UnsignedOverflow);
2353
2354 llvm::Value *Overflow, *Result;
2355 if (ResultInfo.Signed) {
2356 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2357 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2358 auto IntMax =
2359 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2360 llvm::Value *MaxResult =
2361 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2362 CGF.Builder.CreateZExt(IsNegative, OpTy));
2363 llvm::Value *SignedOverflow =
2364 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2365 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2366
2367 // Prepare the signed result (possibly by negating it).
2368 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2369 llvm::Value *SignedResult =
2370 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2371 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2372 } else {
2373 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2374 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2375 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2376 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2377 if (ResultInfo.Width < OpWidth) {
2378 auto IntMax =
2379 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2380 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2381 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2382 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2383 }
2384
2385 // Negate the product if it would be negative in infinite precision.
2386 Result = CGF.Builder.CreateSelect(
2387 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2388
2389 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2390 }
2391 assert(Overflow && Result && "Missing overflow or result");
2392
2393 bool isVolatile =
2394 ResultArg->getType()->getPointeeType().isVolatileQualified();
2395 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2396 isVolatile);
2397 return RValue::get(Overflow);
2398}
2399
2400static bool
2402 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2403 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2404 Ty = Ctx.getBaseElementType(Arr);
2405
2406 const auto *Record = Ty->getAsCXXRecordDecl();
2407 if (!Record)
2408 return false;
2409
2410 // We've already checked this type, or are in the process of checking it.
2411 if (!Seen.insert(Record).second)
2412 return false;
2413
2414 assert(Record->hasDefinition() &&
2415 "Incomplete types should already be diagnosed");
2416
2417 if (Record->isDynamicClass())
2418 return true;
2419
2420 for (FieldDecl *F : Record->fields()) {
2421 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2422 return true;
2423 }
2424 return false;
2425}
2426
2427/// Determine if the specified type requires laundering by checking if it is a
2428/// dynamic class type or contains a subobject which is a dynamic class type.
2430 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2431 return false;
2433 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2434}
2435
2436RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2437 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2438 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2439
2440 // The builtin's shift arg may have a different type than the source arg and
2441 // result, but the LLVM intrinsic uses the same type for all values.
2442 llvm::Type *Ty = Src->getType();
2443 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2444
2445 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2446 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2447 Function *F = CGM.getIntrinsic(IID, Ty);
2448 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2449}
2450
2451// Map math builtins for long-double to f128 version.
2452static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2453 switch (BuiltinID) {
2454#define MUTATE_LDBL(func) \
2455 case Builtin::BI__builtin_##func##l: \
2456 return Builtin::BI__builtin_##func##f128;
2487 MUTATE_LDBL(nans)
2488 MUTATE_LDBL(inf)
2507 MUTATE_LDBL(huge_val)
2517#undef MUTATE_LDBL
2518 default:
2519 return BuiltinID;
2520 }
2521}
2522
2523static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2524 Value *V) {
2525 if (CGF.Builder.getIsFPConstrained() &&
2526 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2527 if (Value *Result =
2528 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2529 return Result;
2530 }
2531 return nullptr;
2532}
2533
2535 const FunctionDecl *FD) {
2536 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2537 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2538 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2539
2541 for (auto &&FormalTy : FnTy->params())
2542 Args.push_back(llvm::PoisonValue::get(FormalTy));
2543
2544 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2545}
2546
2547RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2548 const CallExpr *E,
2549 ReturnValueSlot ReturnValue) {
2550 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2551 // See if we can constant fold this builtin. If so, don't emit it at all.
2552 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2555 !Result.hasSideEffects()) {
2556 if (Result.Val.isInt())
2557 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2558 Result.Val.getInt()));
2559 if (Result.Val.isFloat())
2560 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2561 Result.Val.getFloat()));
2562 }
2563
2564 // If current long-double semantics is IEEE 128-bit, replace math builtins
2565 // of long-double with f128 equivalent.
2566 // TODO: This mutation should also be applied to other targets other than PPC,
2567 // after backend supports IEEE 128-bit style libcalls.
2568 if (getTarget().getTriple().isPPC64() &&
2569 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2570 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2571
2572 // If the builtin has been declared explicitly with an assembler label,
2573 // disable the specialized emitting below. Ideally we should communicate the
2574 // rename in IR, or at least avoid generating the intrinsic calls that are
2575 // likely to get lowered to the renamed library functions.
2576 const unsigned BuiltinIDIfNoAsmLabel =
2577 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2578
2579 std::optional<bool> ErrnoOverriden;
2580 // ErrnoOverriden is true if math-errno is overriden via the
2581 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2582 // which implies math-errno.
2583 if (E->hasStoredFPFeatures()) {
2584 FPOptionsOverride OP = E->getFPFeatures();
2585 if (OP.hasMathErrnoOverride())
2586 ErrnoOverriden = OP.getMathErrnoOverride();
2587 }
2588 // True if 'attribute__((optnone))' is used. This attribute overrides
2589 // fast-math which implies math-errno.
2590 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2591
2592 // True if we are compiling at -O2 and errno has been disabled
2593 // using the '#pragma float_control(precise, off)', and
2594 // attribute opt-none hasn't been seen.
2595 bool ErrnoOverridenToFalseWithOpt =
2596 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2597 CGM.getCodeGenOpts().OptimizationLevel != 0;
2598
2599 // There are LLVM math intrinsics/instructions corresponding to math library
2600 // functions except the LLVM op will never set errno while the math library
2601 // might. Also, math builtins have the same semantics as their math library
2602 // twins. Thus, we can transform math library and builtin calls to their
2603 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2604 // In case FP exceptions are enabled, the experimental versions of the
2605 // intrinsics model those.
2606 bool ConstAlways =
2607 getContext().BuiltinInfo.isConst(BuiltinID);
2608
2609 // There's a special case with the fma builtins where they are always const
2610 // if the target environment is GNU or the target is OS is Windows and we're
2611 // targeting the MSVCRT.dll environment.
2612 // FIXME: This list can be become outdated. Need to find a way to get it some
2613 // other way.
2614 switch (BuiltinID) {
2615 case Builtin::BI__builtin_fma:
2616 case Builtin::BI__builtin_fmaf:
2617 case Builtin::BI__builtin_fmal:
2618 case Builtin::BI__builtin_fmaf16:
2619 case Builtin::BIfma:
2620 case Builtin::BIfmaf:
2621 case Builtin::BIfmal: {
2622 auto &Trip = CGM.getTriple();
2623 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2624 ConstAlways = true;
2625 break;
2626 }
2627 default:
2628 break;
2629 }
2630
2631 bool ConstWithoutErrnoAndExceptions =
2633 bool ConstWithoutExceptions =
2635
2636 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2637 // disabled.
2638 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2639 // or attributes that affect math-errno should prevent or allow math
2640 // intrincs to be generated. Intrinsics are generated:
2641 // 1- In fast math mode, unless math-errno is overriden
2642 // via '#pragma float_control(precise, on)', or via an
2643 // 'attribute__((optnone))'.
2644 // 2- If math-errno was enabled on command line but overriden
2645 // to false via '#pragma float_control(precise, off))' and
2646 // 'attribute__((optnone))' hasn't been used.
2647 // 3- If we are compiling with optimization and errno has been disabled
2648 // via '#pragma float_control(precise, off)', and
2649 // 'attribute__((optnone))' hasn't been used.
2650
2651 bool ConstWithoutErrnoOrExceptions =
2652 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2653 bool GenerateIntrinsics =
2654 (ConstAlways && !OptNone) ||
2655 (!getLangOpts().MathErrno &&
2656 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2657 if (!GenerateIntrinsics) {
2658 GenerateIntrinsics =
2659 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2660 if (!GenerateIntrinsics)
2661 GenerateIntrinsics =
2662 ConstWithoutErrnoOrExceptions &&
2663 (!getLangOpts().MathErrno &&
2664 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2665 if (!GenerateIntrinsics)
2666 GenerateIntrinsics =
2667 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2668 }
2669 if (GenerateIntrinsics) {
2670 switch (BuiltinIDIfNoAsmLabel) {
2671 case Builtin::BIacos:
2672 case Builtin::BIacosf:
2673 case Builtin::BIacosl:
2674 case Builtin::BI__builtin_acos:
2675 case Builtin::BI__builtin_acosf:
2676 case Builtin::BI__builtin_acosf16:
2677 case Builtin::BI__builtin_acosl:
2678 case Builtin::BI__builtin_acosf128:
2680 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2681
2682 case Builtin::BIasin:
2683 case Builtin::BIasinf:
2684 case Builtin::BIasinl:
2685 case Builtin::BI__builtin_asin:
2686 case Builtin::BI__builtin_asinf:
2687 case Builtin::BI__builtin_asinf16:
2688 case Builtin::BI__builtin_asinl:
2689 case Builtin::BI__builtin_asinf128:
2691 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2692
2693 case Builtin::BIatan:
2694 case Builtin::BIatanf:
2695 case Builtin::BIatanl:
2696 case Builtin::BI__builtin_atan:
2697 case Builtin::BI__builtin_atanf:
2698 case Builtin::BI__builtin_atanf16:
2699 case Builtin::BI__builtin_atanl:
2700 case Builtin::BI__builtin_atanf128:
2702 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2703
2704 case Builtin::BIceil:
2705 case Builtin::BIceilf:
2706 case Builtin::BIceill:
2707 case Builtin::BI__builtin_ceil:
2708 case Builtin::BI__builtin_ceilf:
2709 case Builtin::BI__builtin_ceilf16:
2710 case Builtin::BI__builtin_ceill:
2711 case Builtin::BI__builtin_ceilf128:
2713 Intrinsic::ceil,
2714 Intrinsic::experimental_constrained_ceil));
2715
2716 case Builtin::BIcopysign:
2717 case Builtin::BIcopysignf:
2718 case Builtin::BIcopysignl:
2719 case Builtin::BI__builtin_copysign:
2720 case Builtin::BI__builtin_copysignf:
2721 case Builtin::BI__builtin_copysignf16:
2722 case Builtin::BI__builtin_copysignl:
2723 case Builtin::BI__builtin_copysignf128:
2724 return RValue::get(
2725 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2726
2727 case Builtin::BIcos:
2728 case Builtin::BIcosf:
2729 case Builtin::BIcosl:
2730 case Builtin::BI__builtin_cos:
2731 case Builtin::BI__builtin_cosf:
2732 case Builtin::BI__builtin_cosf16:
2733 case Builtin::BI__builtin_cosl:
2734 case Builtin::BI__builtin_cosf128:
2736 Intrinsic::cos,
2737 Intrinsic::experimental_constrained_cos));
2738
2739 case Builtin::BIcosh:
2740 case Builtin::BIcoshf:
2741 case Builtin::BIcoshl:
2742 case Builtin::BI__builtin_cosh:
2743 case Builtin::BI__builtin_coshf:
2744 case Builtin::BI__builtin_coshf16:
2745 case Builtin::BI__builtin_coshl:
2746 case Builtin::BI__builtin_coshf128:
2748 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
2749
2750 case Builtin::BIexp:
2751 case Builtin::BIexpf:
2752 case Builtin::BIexpl:
2753 case Builtin::BI__builtin_exp:
2754 case Builtin::BI__builtin_expf:
2755 case Builtin::BI__builtin_expf16:
2756 case Builtin::BI__builtin_expl:
2757 case Builtin::BI__builtin_expf128:
2759 Intrinsic::exp,
2760 Intrinsic::experimental_constrained_exp));
2761
2762 case Builtin::BIexp2:
2763 case Builtin::BIexp2f:
2764 case Builtin::BIexp2l:
2765 case Builtin::BI__builtin_exp2:
2766 case Builtin::BI__builtin_exp2f:
2767 case Builtin::BI__builtin_exp2f16:
2768 case Builtin::BI__builtin_exp2l:
2769 case Builtin::BI__builtin_exp2f128:
2771 Intrinsic::exp2,
2772 Intrinsic::experimental_constrained_exp2));
2773 case Builtin::BI__builtin_exp10:
2774 case Builtin::BI__builtin_exp10f:
2775 case Builtin::BI__builtin_exp10f16:
2776 case Builtin::BI__builtin_exp10l:
2777 case Builtin::BI__builtin_exp10f128: {
2778 // TODO: strictfp support
2779 if (Builder.getIsFPConstrained())
2780 break;
2781 return RValue::get(
2782 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
2783 }
2784 case Builtin::BIfabs:
2785 case Builtin::BIfabsf:
2786 case Builtin::BIfabsl:
2787 case Builtin::BI__builtin_fabs:
2788 case Builtin::BI__builtin_fabsf:
2789 case Builtin::BI__builtin_fabsf16:
2790 case Builtin::BI__builtin_fabsl:
2791 case Builtin::BI__builtin_fabsf128:
2792 return RValue::get(
2793 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
2794
2795 case Builtin::BIfloor:
2796 case Builtin::BIfloorf:
2797 case Builtin::BIfloorl:
2798 case Builtin::BI__builtin_floor:
2799 case Builtin::BI__builtin_floorf:
2800 case Builtin::BI__builtin_floorf16:
2801 case Builtin::BI__builtin_floorl:
2802 case Builtin::BI__builtin_floorf128:
2804 Intrinsic::floor,
2805 Intrinsic::experimental_constrained_floor));
2806
2807 case Builtin::BIfma:
2808 case Builtin::BIfmaf:
2809 case Builtin::BIfmal:
2810 case Builtin::BI__builtin_fma:
2811 case Builtin::BI__builtin_fmaf:
2812 case Builtin::BI__builtin_fmaf16:
2813 case Builtin::BI__builtin_fmal:
2814 case Builtin::BI__builtin_fmaf128:
2816 Intrinsic::fma,
2817 Intrinsic::experimental_constrained_fma));
2818
2819 case Builtin::BIfmax:
2820 case Builtin::BIfmaxf:
2821 case Builtin::BIfmaxl:
2822 case Builtin::BI__builtin_fmax:
2823 case Builtin::BI__builtin_fmaxf:
2824 case Builtin::BI__builtin_fmaxf16:
2825 case Builtin::BI__builtin_fmaxl:
2826 case Builtin::BI__builtin_fmaxf128:
2828 Intrinsic::maxnum,
2829 Intrinsic::experimental_constrained_maxnum));
2830
2831 case Builtin::BIfmin:
2832 case Builtin::BIfminf:
2833 case Builtin::BIfminl:
2834 case Builtin::BI__builtin_fmin:
2835 case Builtin::BI__builtin_fminf:
2836 case Builtin::BI__builtin_fminf16:
2837 case Builtin::BI__builtin_fminl:
2838 case Builtin::BI__builtin_fminf128:
2840 Intrinsic::minnum,
2841 Intrinsic::experimental_constrained_minnum));
2842
2843 // fmod() is a special-case. It maps to the frem instruction rather than an
2844 // LLVM intrinsic.
2845 case Builtin::BIfmod:
2846 case Builtin::BIfmodf:
2847 case Builtin::BIfmodl:
2848 case Builtin::BI__builtin_fmod:
2849 case Builtin::BI__builtin_fmodf:
2850 case Builtin::BI__builtin_fmodf16:
2851 case Builtin::BI__builtin_fmodl:
2852 case Builtin::BI__builtin_fmodf128: {
2853 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2854 Value *Arg1 = EmitScalarExpr(E->getArg(0));
2855 Value *Arg2 = EmitScalarExpr(E->getArg(1));
2856 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2857 }
2858
2859 case Builtin::BIlog:
2860 case Builtin::BIlogf:
2861 case Builtin::BIlogl:
2862 case Builtin::BI__builtin_log:
2863 case Builtin::BI__builtin_logf:
2864 case Builtin::BI__builtin_logf16:
2865 case Builtin::BI__builtin_logl:
2866 case Builtin::BI__builtin_logf128:
2868 Intrinsic::log,
2869 Intrinsic::experimental_constrained_log));
2870
2871 case Builtin::BIlog10:
2872 case Builtin::BIlog10f:
2873 case Builtin::BIlog10l:
2874 case Builtin::BI__builtin_log10:
2875 case Builtin::BI__builtin_log10f:
2876 case Builtin::BI__builtin_log10f16:
2877 case Builtin::BI__builtin_log10l:
2878 case Builtin::BI__builtin_log10f128:
2880 Intrinsic::log10,
2881 Intrinsic::experimental_constrained_log10));
2882
2883 case Builtin::BIlog2:
2884 case Builtin::BIlog2f:
2885 case Builtin::BIlog2l:
2886 case Builtin::BI__builtin_log2:
2887 case Builtin::BI__builtin_log2f:
2888 case Builtin::BI__builtin_log2f16:
2889 case Builtin::BI__builtin_log2l:
2890 case Builtin::BI__builtin_log2f128:
2892 Intrinsic::log2,
2893 Intrinsic::experimental_constrained_log2));
2894
2895 case Builtin::BInearbyint:
2896 case Builtin::BInearbyintf:
2897 case Builtin::BInearbyintl:
2898 case Builtin::BI__builtin_nearbyint:
2899 case Builtin::BI__builtin_nearbyintf:
2900 case Builtin::BI__builtin_nearbyintl:
2901 case Builtin::BI__builtin_nearbyintf128:
2903 Intrinsic::nearbyint,
2904 Intrinsic::experimental_constrained_nearbyint));
2905
2906 case Builtin::BIpow:
2907 case Builtin::BIpowf:
2908 case Builtin::BIpowl:
2909 case Builtin::BI__builtin_pow:
2910 case Builtin::BI__builtin_powf:
2911 case Builtin::BI__builtin_powf16:
2912 case Builtin::BI__builtin_powl:
2913 case Builtin::BI__builtin_powf128:
2915 Intrinsic::pow,
2916 Intrinsic::experimental_constrained_pow));
2917
2918 case Builtin::BIrint:
2919 case Builtin::BIrintf:
2920 case Builtin::BIrintl:
2921 case Builtin::BI__builtin_rint:
2922 case Builtin::BI__builtin_rintf:
2923 case Builtin::BI__builtin_rintf16:
2924 case Builtin::BI__builtin_rintl:
2925 case Builtin::BI__builtin_rintf128:
2927 Intrinsic::rint,
2928 Intrinsic::experimental_constrained_rint));
2929
2930 case Builtin::BIround:
2931 case Builtin::BIroundf:
2932 case Builtin::BIroundl:
2933 case Builtin::BI__builtin_round:
2934 case Builtin::BI__builtin_roundf:
2935 case Builtin::BI__builtin_roundf16:
2936 case Builtin::BI__builtin_roundl:
2937 case Builtin::BI__builtin_roundf128:
2939 Intrinsic::round,
2940 Intrinsic::experimental_constrained_round));
2941
2942 case Builtin::BIroundeven:
2943 case Builtin::BIroundevenf:
2944 case Builtin::BIroundevenl:
2945 case Builtin::BI__builtin_roundeven:
2946 case Builtin::BI__builtin_roundevenf:
2947 case Builtin::BI__builtin_roundevenf16:
2948 case Builtin::BI__builtin_roundevenl:
2949 case Builtin::BI__builtin_roundevenf128:
2951 Intrinsic::roundeven,
2952 Intrinsic::experimental_constrained_roundeven));
2953
2954 case Builtin::BIsin:
2955 case Builtin::BIsinf:
2956 case Builtin::BIsinl:
2957 case Builtin::BI__builtin_sin:
2958 case Builtin::BI__builtin_sinf:
2959 case Builtin::BI__builtin_sinf16:
2960 case Builtin::BI__builtin_sinl:
2961 case Builtin::BI__builtin_sinf128:
2963 Intrinsic::sin,
2964 Intrinsic::experimental_constrained_sin));
2965
2966 case Builtin::BIsinh:
2967 case Builtin::BIsinhf:
2968 case Builtin::BIsinhl:
2969 case Builtin::BI__builtin_sinh:
2970 case Builtin::BI__builtin_sinhf:
2971 case Builtin::BI__builtin_sinhf16:
2972 case Builtin::BI__builtin_sinhl:
2973 case Builtin::BI__builtin_sinhf128:
2975 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
2976
2977 case Builtin::BIsqrt:
2978 case Builtin::BIsqrtf:
2979 case Builtin::BIsqrtl:
2980 case Builtin::BI__builtin_sqrt:
2981 case Builtin::BI__builtin_sqrtf:
2982 case Builtin::BI__builtin_sqrtf16:
2983 case Builtin::BI__builtin_sqrtl:
2984 case Builtin::BI__builtin_sqrtf128:
2985 case Builtin::BI__builtin_elementwise_sqrt: {
2987 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
2989 return RValue::get(Call);
2990 }
2991
2992 case Builtin::BItan:
2993 case Builtin::BItanf:
2994 case Builtin::BItanl:
2995 case Builtin::BI__builtin_tan:
2996 case Builtin::BI__builtin_tanf:
2997 case Builtin::BI__builtin_tanf16:
2998 case Builtin::BI__builtin_tanl:
2999 case Builtin::BI__builtin_tanf128:
3001 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
3002
3003 case Builtin::BItanh:
3004 case Builtin::BItanhf:
3005 case Builtin::BItanhl:
3006 case Builtin::BI__builtin_tanh:
3007 case Builtin::BI__builtin_tanhf:
3008 case Builtin::BI__builtin_tanhf16:
3009 case Builtin::BI__builtin_tanhl:
3010 case Builtin::BI__builtin_tanhf128:
3012 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3013
3014 case Builtin::BItrunc:
3015 case Builtin::BItruncf:
3016 case Builtin::BItruncl:
3017 case Builtin::BI__builtin_trunc:
3018 case Builtin::BI__builtin_truncf:
3019 case Builtin::BI__builtin_truncf16:
3020 case Builtin::BI__builtin_truncl:
3021 case Builtin::BI__builtin_truncf128:
3023 Intrinsic::trunc,
3024 Intrinsic::experimental_constrained_trunc));
3025
3026 case Builtin::BIlround:
3027 case Builtin::BIlroundf:
3028 case Builtin::BIlroundl:
3029 case Builtin::BI__builtin_lround:
3030 case Builtin::BI__builtin_lroundf:
3031 case Builtin::BI__builtin_lroundl:
3032 case Builtin::BI__builtin_lroundf128:
3034 *this, E, Intrinsic::lround,
3035 Intrinsic::experimental_constrained_lround));
3036
3037 case Builtin::BIllround:
3038 case Builtin::BIllroundf:
3039 case Builtin::BIllroundl:
3040 case Builtin::BI__builtin_llround:
3041 case Builtin::BI__builtin_llroundf:
3042 case Builtin::BI__builtin_llroundl:
3043 case Builtin::BI__builtin_llroundf128:
3045 *this, E, Intrinsic::llround,
3046 Intrinsic::experimental_constrained_llround));
3047
3048 case Builtin::BIlrint:
3049 case Builtin::BIlrintf:
3050 case Builtin::BIlrintl:
3051 case Builtin::BI__builtin_lrint:
3052 case Builtin::BI__builtin_lrintf:
3053 case Builtin::BI__builtin_lrintl:
3054 case Builtin::BI__builtin_lrintf128:
3056 *this, E, Intrinsic::lrint,
3057 Intrinsic::experimental_constrained_lrint));
3058
3059 case Builtin::BIllrint:
3060 case Builtin::BIllrintf:
3061 case Builtin::BIllrintl:
3062 case Builtin::BI__builtin_llrint:
3063 case Builtin::BI__builtin_llrintf:
3064 case Builtin::BI__builtin_llrintl:
3065 case Builtin::BI__builtin_llrintf128:
3067 *this, E, Intrinsic::llrint,
3068 Intrinsic::experimental_constrained_llrint));
3069 case Builtin::BI__builtin_ldexp:
3070 case Builtin::BI__builtin_ldexpf:
3071 case Builtin::BI__builtin_ldexpl:
3072 case Builtin::BI__builtin_ldexpf16:
3073 case Builtin::BI__builtin_ldexpf128: {
3075 *this, E, Intrinsic::ldexp,
3076 Intrinsic::experimental_constrained_ldexp));
3077 }
3078 default:
3079 break;
3080 }
3081 }
3082
3083 // Check NonnullAttribute/NullabilityArg and Alignment.
3084 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3085 unsigned ParmNum) {
3086 Value *Val = A.emitRawPointer(*this);
3087 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3088 ParmNum);
3089
3090 if (SanOpts.has(SanitizerKind::Alignment)) {
3091 SanitizerSet SkippedChecks;
3092 SkippedChecks.set(SanitizerKind::All);
3093 SkippedChecks.clear(SanitizerKind::Alignment);
3094 SourceLocation Loc = Arg->getExprLoc();
3095 // Strip an implicit cast.
3096 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3097 if (CE->getCastKind() == CK_BitCast)
3098 Arg = CE->getSubExpr();
3099 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3100 SkippedChecks);
3101 }
3102 };
3103
3104 switch (BuiltinIDIfNoAsmLabel) {
3105 default: break;
3106 case Builtin::BI__builtin___CFStringMakeConstantString:
3107 case Builtin::BI__builtin___NSStringMakeConstantString:
3108 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3109 case Builtin::BI__builtin_stdarg_start:
3110 case Builtin::BI__builtin_va_start:
3111 case Builtin::BI__va_start:
3112 case Builtin::BI__builtin_va_end:
3113 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3114 ? EmitScalarExpr(E->getArg(0))
3115 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3116 BuiltinID != Builtin::BI__builtin_va_end);
3117 return RValue::get(nullptr);
3118 case Builtin::BI__builtin_va_copy: {
3119 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3120 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3121 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3122 {DstPtr, SrcPtr});
3123 return RValue::get(nullptr);
3124 }
3125 case Builtin::BIabs:
3126 case Builtin::BIlabs:
3127 case Builtin::BIllabs:
3128 case Builtin::BI__builtin_abs:
3129 case Builtin::BI__builtin_labs:
3130 case Builtin::BI__builtin_llabs: {
3131 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3132
3133 Value *Result;
3134 switch (getLangOpts().getSignedOverflowBehavior()) {
3136 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3137 break;
3139 if (!SanitizeOverflow) {
3140 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3141 break;
3142 }
3143 [[fallthrough]];
3145 // TODO: Somehow handle the corner case when the address of abs is taken.
3146 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3147 break;
3148 }
3149 return RValue::get(Result);
3150 }
3151 case Builtin::BI__builtin_complex: {
3152 Value *Real = EmitScalarExpr(E->getArg(0));
3153 Value *Imag = EmitScalarExpr(E->getArg(1));
3154 return RValue::getComplex({Real, Imag});
3155 }
3156 case Builtin::BI__builtin_conj:
3157 case Builtin::BI__builtin_conjf:
3158 case Builtin::BI__builtin_conjl:
3159 case Builtin::BIconj:
3160 case Builtin::BIconjf:
3161 case Builtin::BIconjl: {
3162 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3163 Value *Real = ComplexVal.first;
3164 Value *Imag = ComplexVal.second;
3165 Imag = Builder.CreateFNeg(Imag, "neg");
3166 return RValue::getComplex(std::make_pair(Real, Imag));
3167 }
3168 case Builtin::BI__builtin_creal:
3169 case Builtin::BI__builtin_crealf:
3170 case Builtin::BI__builtin_creall:
3171 case Builtin::BIcreal:
3172 case Builtin::BIcrealf:
3173 case Builtin::BIcreall: {
3174 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3175 return RValue::get(ComplexVal.first);
3176 }
3177
3178 case Builtin::BI__builtin_preserve_access_index: {
3179 // Only enabled preserved access index region when debuginfo
3180 // is available as debuginfo is needed to preserve user-level
3181 // access pattern.
3182 if (!getDebugInfo()) {
3183 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3184 return RValue::get(EmitScalarExpr(E->getArg(0)));
3185 }
3186
3187 // Nested builtin_preserve_access_index() not supported
3189 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3190 return RValue::get(EmitScalarExpr(E->getArg(0)));
3191 }
3192
3193 IsInPreservedAIRegion = true;
3194 Value *Res = EmitScalarExpr(E->getArg(0));
3195 IsInPreservedAIRegion = false;
3196 return RValue::get(Res);
3197 }
3198
3199 case Builtin::BI__builtin_cimag:
3200 case Builtin::BI__builtin_cimagf:
3201 case Builtin::BI__builtin_cimagl:
3202 case Builtin::BIcimag:
3203 case Builtin::BIcimagf:
3204 case Builtin::BIcimagl: {
3205 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3206 return RValue::get(ComplexVal.second);
3207 }
3208
3209 case Builtin::BI__builtin_clrsb:
3210 case Builtin::BI__builtin_clrsbl:
3211 case Builtin::BI__builtin_clrsbll: {
3212 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3213 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3214
3215 llvm::Type *ArgType = ArgValue->getType();
3216 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3217
3218 llvm::Type *ResultType = ConvertType(E->getType());
3219 Value *Zero = llvm::Constant::getNullValue(ArgType);
3220 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3221 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3222 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3223 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3224 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3225 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3226 "cast");
3227 return RValue::get(Result);
3228 }
3229 case Builtin::BI__builtin_ctzs:
3230 case Builtin::BI__builtin_ctz:
3231 case Builtin::BI__builtin_ctzl:
3232 case Builtin::BI__builtin_ctzll:
3233 case Builtin::BI__builtin_ctzg: {
3234 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3235 E->getNumArgs() > 1;
3236
3237 Value *ArgValue =
3238 HasFallback ? EmitScalarExpr(E->getArg(0))
3240
3241 llvm::Type *ArgType = ArgValue->getType();
3242 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3243
3244 llvm::Type *ResultType = ConvertType(E->getType());
3245 Value *ZeroUndef =
3246 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3247 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3248 if (Result->getType() != ResultType)
3249 Result =
3250 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3251 if (!HasFallback)
3252 return RValue::get(Result);
3253
3254 Value *Zero = Constant::getNullValue(ArgType);
3255 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3256 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3257 Value *ResultOrFallback =
3258 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3259 return RValue::get(ResultOrFallback);
3260 }
3261 case Builtin::BI__builtin_clzs:
3262 case Builtin::BI__builtin_clz:
3263 case Builtin::BI__builtin_clzl:
3264 case Builtin::BI__builtin_clzll:
3265 case Builtin::BI__builtin_clzg: {
3266 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3267 E->getNumArgs() > 1;
3268
3269 Value *ArgValue =
3270 HasFallback ? EmitScalarExpr(E->getArg(0))
3272
3273 llvm::Type *ArgType = ArgValue->getType();
3274 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3275
3276 llvm::Type *ResultType = ConvertType(E->getType());
3277 Value *ZeroUndef =
3278 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3279 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3280 if (Result->getType() != ResultType)
3281 Result =
3282 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3283 if (!HasFallback)
3284 return RValue::get(Result);
3285
3286 Value *Zero = Constant::getNullValue(ArgType);
3287 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3288 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3289 Value *ResultOrFallback =
3290 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3291 return RValue::get(ResultOrFallback);
3292 }
3293 case Builtin::BI__builtin_ffs:
3294 case Builtin::BI__builtin_ffsl:
3295 case Builtin::BI__builtin_ffsll: {
3296 // ffs(x) -> x ? cttz(x) + 1 : 0
3297 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3298
3299 llvm::Type *ArgType = ArgValue->getType();
3300 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3301
3302 llvm::Type *ResultType = ConvertType(E->getType());
3303 Value *Tmp =
3304 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3305 llvm::ConstantInt::get(ArgType, 1));
3306 Value *Zero = llvm::Constant::getNullValue(ArgType);
3307 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3308 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3309 if (Result->getType() != ResultType)
3310 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3311 "cast");
3312 return RValue::get(Result);
3313 }
3314 case Builtin::BI__builtin_parity:
3315 case Builtin::BI__builtin_parityl:
3316 case Builtin::BI__builtin_parityll: {
3317 // parity(x) -> ctpop(x) & 1
3318 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3319
3320 llvm::Type *ArgType = ArgValue->getType();
3321 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3322
3323 llvm::Type *ResultType = ConvertType(E->getType());
3324 Value *Tmp = Builder.CreateCall(F, ArgValue);
3325 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3326 if (Result->getType() != ResultType)
3327 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3328 "cast");
3329 return RValue::get(Result);
3330 }
3331 case Builtin::BI__lzcnt16:
3332 case Builtin::BI__lzcnt:
3333 case Builtin::BI__lzcnt64: {
3334 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3335
3336 llvm::Type *ArgType = ArgValue->getType();
3337 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3338
3339 llvm::Type *ResultType = ConvertType(E->getType());
3340 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3341 if (Result->getType() != ResultType)
3342 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3343 "cast");
3344 return RValue::get(Result);
3345 }
3346 case Builtin::BI__popcnt16:
3347 case Builtin::BI__popcnt:
3348 case Builtin::BI__popcnt64:
3349 case Builtin::BI__builtin_popcount:
3350 case Builtin::BI__builtin_popcountl:
3351 case Builtin::BI__builtin_popcountll:
3352 case Builtin::BI__builtin_popcountg: {
3353 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3354
3355 llvm::Type *ArgType = ArgValue->getType();
3356 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3357
3358 llvm::Type *ResultType = ConvertType(E->getType());
3359 Value *Result = Builder.CreateCall(F, ArgValue);
3360 if (Result->getType() != ResultType)
3361 Result =
3362 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3363 return RValue::get(Result);
3364 }
3365 case Builtin::BI__builtin_unpredictable: {
3366 // Always return the argument of __builtin_unpredictable. LLVM does not
3367 // handle this builtin. Metadata for this builtin should be added directly
3368 // to instructions such as branches or switches that use it.
3369 return RValue::get(EmitScalarExpr(E->getArg(0)));
3370 }
3371 case Builtin::BI__builtin_expect: {
3372 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3373 llvm::Type *ArgType = ArgValue->getType();
3374
3375 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3376 // Don't generate llvm.expect on -O0 as the backend won't use it for
3377 // anything.
3378 // Note, we still IRGen ExpectedValue because it could have side-effects.
3379 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3380 return RValue::get(ArgValue);
3381
3382 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3383 Value *Result =
3384 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3385 return RValue::get(Result);
3386 }
3387 case Builtin::BI__builtin_expect_with_probability: {
3388 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3389 llvm::Type *ArgType = ArgValue->getType();
3390
3391 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3392 llvm::APFloat Probability(0.0);
3393 const Expr *ProbArg = E->getArg(2);
3394 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3395 assert(EvalSucceed && "probability should be able to evaluate as float");
3396 (void)EvalSucceed;
3397 bool LoseInfo = false;
3398 Probability.convert(llvm::APFloat::IEEEdouble(),
3399 llvm::RoundingMode::Dynamic, &LoseInfo);
3400 llvm::Type *Ty = ConvertType(ProbArg->getType());
3401 Constant *Confidence = ConstantFP::get(Ty, Probability);
3402 // Don't generate llvm.expect.with.probability on -O0 as the backend
3403 // won't use it for anything.
3404 // Note, we still IRGen ExpectedValue because it could have side-effects.
3405 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3406 return RValue::get(ArgValue);
3407
3408 Function *FnExpect =
3409 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3410 Value *Result = Builder.CreateCall(
3411 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3412 return RValue::get(Result);
3413 }
3414 case Builtin::BI__builtin_assume_aligned: {
3415 const Expr *Ptr = E->getArg(0);
3416 Value *PtrValue = EmitScalarExpr(Ptr);
3417 Value *OffsetValue =
3418 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3419
3420 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3421 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3422 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3423 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3424 llvm::Value::MaximumAlignment);
3425
3426 emitAlignmentAssumption(PtrValue, Ptr,
3427 /*The expr loc is sufficient.*/ SourceLocation(),
3428 AlignmentCI, OffsetValue);
3429 return RValue::get(PtrValue);
3430 }
3431 case Builtin::BI__assume:
3432 case Builtin::BI__builtin_assume: {
3433 if (E->getArg(0)->HasSideEffects(getContext()))
3434 return RValue::get(nullptr);
3435
3436 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3437 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3438 Builder.CreateCall(FnAssume, ArgValue);
3439 return RValue::get(nullptr);
3440 }
3441 case Builtin::BI__builtin_assume_separate_storage: {
3442 const Expr *Arg0 = E->getArg(0);
3443 const Expr *Arg1 = E->getArg(1);
3444
3445 Value *Value0 = EmitScalarExpr(Arg0);
3446 Value *Value1 = EmitScalarExpr(Arg1);
3447
3448 Value *Values[] = {Value0, Value1};
3449 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3450 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3451 return RValue::get(nullptr);
3452 }
3453 case Builtin::BI__builtin_allow_runtime_check: {
3454 StringRef Kind =
3455 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3456 LLVMContext &Ctx = CGM.getLLVMContext();
3457 llvm::Value *Allow = Builder.CreateCall(
3458 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3459 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3460 return RValue::get(Allow);
3461 }
3462 case Builtin::BI__arithmetic_fence: {
3463 // Create the builtin call if FastMath is selected, and the target
3464 // supports the builtin, otherwise just return the argument.
3465 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3466 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3467 bool isArithmeticFenceEnabled =
3468 FMF.allowReassoc() &&
3470 QualType ArgType = E->getArg(0)->getType();
3471 if (ArgType->isComplexType()) {
3472 if (isArithmeticFenceEnabled) {
3473 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3474 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3475 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3476 ConvertType(ElementType));
3477 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3478 ConvertType(ElementType));
3479 return RValue::getComplex(std::make_pair(Real, Imag));
3480 }
3481 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3482 Value *Real = ComplexVal.first;
3483 Value *Imag = ComplexVal.second;
3484 return RValue::getComplex(std::make_pair(Real, Imag));
3485 }
3486 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3487 if (isArithmeticFenceEnabled)
3488 return RValue::get(
3489 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3490 return RValue::get(ArgValue);
3491 }
3492 case Builtin::BI__builtin_bswap16:
3493 case Builtin::BI__builtin_bswap32:
3494 case Builtin::BI__builtin_bswap64:
3495 case Builtin::BI_byteswap_ushort:
3496 case Builtin::BI_byteswap_ulong:
3497 case Builtin::BI_byteswap_uint64: {
3498 return RValue::get(
3499 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3500 }
3501 case Builtin::BI__builtin_bitreverse8:
3502 case Builtin::BI__builtin_bitreverse16:
3503 case Builtin::BI__builtin_bitreverse32:
3504 case Builtin::BI__builtin_bitreverse64: {
3505 return RValue::get(
3506 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3507 }
3508 case Builtin::BI__builtin_rotateleft8:
3509 case Builtin::BI__builtin_rotateleft16:
3510 case Builtin::BI__builtin_rotateleft32:
3511 case Builtin::BI__builtin_rotateleft64:
3512 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3513 case Builtin::BI_rotl16:
3514 case Builtin::BI_rotl:
3515 case Builtin::BI_lrotl:
3516 case Builtin::BI_rotl64:
3517 return emitRotate(E, false);
3518
3519 case Builtin::BI__builtin_rotateright8:
3520 case Builtin::BI__builtin_rotateright16:
3521 case Builtin::BI__builtin_rotateright32:
3522 case Builtin::BI__builtin_rotateright64:
3523 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3524 case Builtin::BI_rotr16:
3525 case Builtin::BI_rotr:
3526 case Builtin::BI_lrotr:
3527 case Builtin::BI_rotr64:
3528 return emitRotate(E, true);
3529
3530 case Builtin::BI__builtin_constant_p: {
3531 llvm::Type *ResultType = ConvertType(E->getType());
3532
3533 const Expr *Arg = E->getArg(0);
3534 QualType ArgType = Arg->getType();
3535 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3536 // and likely a mistake.
3537 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3538 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3539 // Per the GCC documentation, only numeric constants are recognized after
3540 // inlining.
3541 return RValue::get(ConstantInt::get(ResultType, 0));
3542
3543 if (Arg->HasSideEffects(getContext()))
3544 // The argument is unevaluated, so be conservative if it might have
3545 // side-effects.
3546 return RValue::get(ConstantInt::get(ResultType, 0));
3547
3548 Value *ArgValue = EmitScalarExpr(Arg);
3549 if (ArgType->isObjCObjectPointerType()) {
3550 // Convert Objective-C objects to id because we cannot distinguish between
3551 // LLVM types for Obj-C classes as they are opaque.
3552 ArgType = CGM.getContext().getObjCIdType();
3553 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3554 }
3555 Function *F =
3556 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3557 Value *Result = Builder.CreateCall(F, ArgValue);
3558 if (Result->getType() != ResultType)
3559 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3560 return RValue::get(Result);
3561 }
3562 case Builtin::BI__builtin_dynamic_object_size:
3563 case Builtin::BI__builtin_object_size: {
3564 unsigned Type =
3565 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3566 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3567
3568 // We pass this builtin onto the optimizer so that it can figure out the
3569 // object size in more complex cases.
3570 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3571 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3572 /*EmittedE=*/nullptr, IsDynamic));
3573 }
3574 case Builtin::BI__builtin_prefetch: {
3575 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3576 // FIXME: Technically these constants should of type 'int', yes?
3577 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3578 llvm::ConstantInt::get(Int32Ty, 0);
3579 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3580 llvm::ConstantInt::get(Int32Ty, 3);
3581 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3582 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3583 Builder.CreateCall(F, {Address, RW, Locality, Data});
3584 return RValue::get(nullptr);
3585 }
3586 case Builtin::BI__builtin_readcyclecounter: {
3587 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3588 return RValue::get(Builder.CreateCall(F));
3589 }
3590 case Builtin::BI__builtin_readsteadycounter: {
3591 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3592 return RValue::get(Builder.CreateCall(F));
3593 }
3594 case Builtin::BI__builtin___clear_cache: {
3595 Value *Begin = EmitScalarExpr(E->getArg(0));
3596 Value *End = EmitScalarExpr(E->getArg(1));
3597 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3598 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3599 }
3600 case Builtin::BI__builtin_trap:
3601 EmitTrapCall(Intrinsic::trap);
3602 return RValue::get(nullptr);
3603 case Builtin::BI__builtin_verbose_trap: {
3604 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3605 if (getDebugInfo()) {
3606 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3607 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3608 *E->getArg(1)->tryEvaluateString(getContext()));
3609 }
3610 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3611 // Currently no attempt is made to prevent traps from being merged.
3612 EmitTrapCall(Intrinsic::trap);
3613 return RValue::get(nullptr);
3614 }
3615 case Builtin::BI__debugbreak:
3616 EmitTrapCall(Intrinsic::debugtrap);
3617 return RValue::get(nullptr);
3618 case Builtin::BI__builtin_unreachable: {
3620
3621 // We do need to preserve an insertion point.
3622 EmitBlock(createBasicBlock("unreachable.cont"));
3623
3624 return RValue::get(nullptr);
3625 }
3626
3627 case Builtin::BI__builtin_powi:
3628 case Builtin::BI__builtin_powif:
3629 case Builtin::BI__builtin_powil: {
3630 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3631 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3632
3633 if (Builder.getIsFPConstrained()) {
3634 // FIXME: llvm.powi has 2 mangling types,
3635 // llvm.experimental.constrained.powi has one.
3636 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3637 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3638 Src0->getType());
3639 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3640 }
3641
3642 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3643 { Src0->getType(), Src1->getType() });
3644 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3645 }
3646 case Builtin::BI__builtin_frexpl: {
3647 // Linux PPC will not be adding additional PPCDoubleDouble support.
3648 // WIP to switch default to IEEE long double. Will emit libcall for
3649 // frexpl instead of legalizing this type in the BE.
3650 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3651 break;
3652 [[fallthrough]];
3653 }
3654 case Builtin::BI__builtin_frexp:
3655 case Builtin::BI__builtin_frexpf:
3656 case Builtin::BI__builtin_frexpf128:
3657 case Builtin::BI__builtin_frexpf16:
3658 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3659 case Builtin::BI__builtin_isgreater:
3660 case Builtin::BI__builtin_isgreaterequal:
3661 case Builtin::BI__builtin_isless:
3662 case Builtin::BI__builtin_islessequal:
3663 case Builtin::BI__builtin_islessgreater:
3664 case Builtin::BI__builtin_isunordered: {
3665 // Ordered comparisons: we know the arguments to these are matching scalar
3666 // floating point values.
3667 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3668 Value *LHS = EmitScalarExpr(E->getArg(0));
3669 Value *RHS = EmitScalarExpr(E->getArg(1));
3670
3671 switch (BuiltinID) {
3672 default: llvm_unreachable("Unknown ordered comparison");
3673 case Builtin::BI__builtin_isgreater:
3674 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3675 break;
3676 case Builtin::BI__builtin_isgreaterequal:
3677 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3678 break;
3679 case Builtin::BI__builtin_isless:
3680 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3681 break;
3682 case Builtin::BI__builtin_islessequal:
3683 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3684 break;
3685 case Builtin::BI__builtin_islessgreater:
3686 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3687 break;
3688 case Builtin::BI__builtin_isunordered:
3689 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3690 break;
3691 }
3692 // ZExt bool to int type.
3693 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3694 }
3695
3696 case Builtin::BI__builtin_isnan: {
3697 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3698 Value *V = EmitScalarExpr(E->getArg(0));
3699 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3700 return RValue::get(Result);
3701 return RValue::get(
3702 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3703 ConvertType(E->getType())));
3704 }
3705
3706 case Builtin::BI__builtin_issignaling: {
3707 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3708 Value *V = EmitScalarExpr(E->getArg(0));
3709 return RValue::get(
3710 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3711 ConvertType(E->getType())));
3712 }
3713
3714 case Builtin::BI__builtin_isinf: {
3715 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3716 Value *V = EmitScalarExpr(E->getArg(0));
3717 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3718 return RValue::get(Result);
3719 return RValue::get(
3720 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
3721 ConvertType(E->getType())));
3722 }
3723
3724 case Builtin::BIfinite:
3725 case Builtin::BI__finite:
3726 case Builtin::BIfinitef:
3727 case Builtin::BI__finitef:
3728 case Builtin::BIfinitel:
3729 case Builtin::BI__finitel:
3730 case Builtin::BI__builtin_isfinite: {
3731 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3732 Value *V = EmitScalarExpr(E->getArg(0));
3733 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3734 return RValue::get(Result);
3735 return RValue::get(
3736 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
3737 ConvertType(E->getType())));
3738 }
3739
3740 case Builtin::BI__builtin_isnormal: {
3741 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3742 Value *V = EmitScalarExpr(E->getArg(0));
3743 return RValue::get(
3744 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
3745 ConvertType(E->getType())));
3746 }
3747
3748 case Builtin::BI__builtin_issubnormal: {
3749 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3750 Value *V = EmitScalarExpr(E->getArg(0));
3751 return RValue::get(
3752 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
3753 ConvertType(E->getType())));
3754 }
3755
3756 case Builtin::BI__builtin_iszero: {
3757 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3758 Value *V = EmitScalarExpr(E->getArg(0));
3759 return RValue::get(
3760 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
3761 ConvertType(E->getType())));
3762 }
3763
3764 case Builtin::BI__builtin_isfpclass: {
3766 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
3767 break;
3768 uint64_t Test = Result.Val.getInt().getLimitedValue();
3769 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3770 Value *V = EmitScalarExpr(E->getArg(0));
3771 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
3772 ConvertType(E->getType())));
3773 }
3774
3775 case Builtin::BI__builtin_nondeterministic_value: {
3776 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
3777
3778 Value *Result = PoisonValue::get(Ty);
3779 Result = Builder.CreateFreeze(Result);
3780
3781 return RValue::get(Result);
3782 }
3783
3784 case Builtin::BI__builtin_elementwise_abs: {
3785 Value *Result;
3786 QualType QT = E->getArg(0)->getType();
3787
3788 if (auto *VecTy = QT->getAs<VectorType>())
3789 QT = VecTy->getElementType();
3790 if (QT->isIntegerType())
3791 Result = Builder.CreateBinaryIntrinsic(
3792 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3793 Builder.getFalse(), nullptr, "elt.abs");
3794 else
3795 Result = emitBuiltinWithOneOverloadedType<1>(
3796 *this, E, llvm::Intrinsic::fabs, "elt.abs");
3797
3798 return RValue::get(Result);
3799 }
3800 case Builtin::BI__builtin_elementwise_acos:
3801 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3802 *this, E, llvm::Intrinsic::acos, "elt.acos"));
3803 case Builtin::BI__builtin_elementwise_asin:
3804 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3805 *this, E, llvm::Intrinsic::asin, "elt.asin"));
3806 case Builtin::BI__builtin_elementwise_atan:
3807 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3808 *this, E, llvm::Intrinsic::atan, "elt.atan"));
3809 case Builtin::BI__builtin_elementwise_ceil:
3810 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3811 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3812 case Builtin::BI__builtin_elementwise_exp:
3813 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3814 *this, E, llvm::Intrinsic::exp, "elt.exp"));
3815 case Builtin::BI__builtin_elementwise_exp2:
3816 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3817 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3818 case Builtin::BI__builtin_elementwise_log:
3819 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3820 *this, E, llvm::Intrinsic::log, "elt.log"));
3821 case Builtin::BI__builtin_elementwise_log2:
3822 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3823 *this, E, llvm::Intrinsic::log2, "elt.log2"));
3824 case Builtin::BI__builtin_elementwise_log10:
3825 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3826 *this, E, llvm::Intrinsic::log10, "elt.log10"));
3827 case Builtin::BI__builtin_elementwise_pow: {
3828 return RValue::get(
3829 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
3830 }
3831 case Builtin::BI__builtin_elementwise_bitreverse:
3832 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3833 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
3834 case Builtin::BI__builtin_elementwise_cos:
3835 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3836 *this, E, llvm::Intrinsic::cos, "elt.cos"));
3837 case Builtin::BI__builtin_elementwise_cosh:
3838 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3839 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
3840 case Builtin::BI__builtin_elementwise_floor:
3841 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3842 *this, E, llvm::Intrinsic::floor, "elt.floor"));
3843 case Builtin::BI__builtin_elementwise_roundeven:
3844 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3845 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
3846 case Builtin::BI__builtin_elementwise_round:
3847 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3848 *this, E, llvm::Intrinsic::round, "elt.round"));
3849 case Builtin::BI__builtin_elementwise_rint:
3850 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3851 *this, E, llvm::Intrinsic::rint, "elt.rint"));
3852 case Builtin::BI__builtin_elementwise_nearbyint:
3853 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3854 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
3855 case Builtin::BI__builtin_elementwise_sin:
3856 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3857 *this, E, llvm::Intrinsic::sin, "elt.sin"));
3858 case Builtin::BI__builtin_elementwise_sinh:
3859 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3860 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
3861 case Builtin::BI__builtin_elementwise_tan:
3862 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3863 *this, E, llvm::Intrinsic::tan, "elt.tan"));
3864 case Builtin::BI__builtin_elementwise_tanh:
3865 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3866 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
3867 case Builtin::BI__builtin_elementwise_trunc:
3868 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3869 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3870 case Builtin::BI__builtin_elementwise_canonicalize:
3871 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3872 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3873 case Builtin::BI__builtin_elementwise_copysign:
3874 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
3875 *this, E, llvm::Intrinsic::copysign));
3876 case Builtin::BI__builtin_elementwise_fma:
3877 return RValue::get(
3878 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
3879 case Builtin::BI__builtin_elementwise_add_sat:
3880 case Builtin::BI__builtin_elementwise_sub_sat: {
3881 Value *Op0 = EmitScalarExpr(E->getArg(0));
3882 Value *Op1 = EmitScalarExpr(E->getArg(1));
3883 Value *Result;
3884 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3885 QualType Ty = E->getArg(0)->getType();
3886 if (auto *VecTy = Ty->getAs<VectorType>())
3887 Ty = VecTy->getElementType();
3888 bool IsSigned = Ty->isSignedIntegerType();
3889 unsigned Opc;
3890 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3891 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3892 else
3893 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3894 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3895 return RValue::get(Result);
3896 }
3897
3898 case Builtin::BI__builtin_elementwise_max: {
3899 Value *Op0 = EmitScalarExpr(E->getArg(0));
3900 Value *Op1 = EmitScalarExpr(E->getArg(1));
3901 Value *Result;
3902 if (Op0->getType()->isIntOrIntVectorTy()) {
3903 QualType Ty = E->getArg(0)->getType();
3904 if (auto *VecTy = Ty->getAs<VectorType>())
3905 Ty = VecTy->getElementType();
3906 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3907 ? llvm::Intrinsic::smax
3908 : llvm::Intrinsic::umax,
3909 Op0, Op1, nullptr, "elt.max");
3910 } else
3911 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3912 return RValue::get(Result);
3913 }
3914 case Builtin::BI__builtin_elementwise_min: {
3915 Value *Op0 = EmitScalarExpr(E->getArg(0));
3916 Value *Op1 = EmitScalarExpr(E->getArg(1));
3917 Value *Result;
3918 if (Op0->getType()->isIntOrIntVectorTy()) {
3919 QualType Ty = E->getArg(0)->getType();
3920 if (auto *VecTy = Ty->getAs<VectorType>())
3921 Ty = VecTy->getElementType();
3922 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3923 ? llvm::Intrinsic::smin
3924 : llvm::Intrinsic::umin,
3925 Op0, Op1, nullptr, "elt.min");
3926 } else
3927 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3928 return RValue::get(Result);
3929 }
3930
3931 case Builtin::BI__builtin_reduce_max: {
3932 auto GetIntrinsicID = [this](QualType QT) {
3933 if (auto *VecTy = QT->getAs<VectorType>())
3934 QT = VecTy->getElementType();
3935 else if (QT->isSizelessVectorType())
3937
3938 if (QT->isSignedIntegerType())
3939 return llvm::Intrinsic::vector_reduce_smax;
3940 if (QT->isUnsignedIntegerType())
3941 return llvm::Intrinsic::vector_reduce_umax;
3942 assert(QT->isFloatingType() && "must have a float here");
3943 return llvm::Intrinsic::vector_reduce_fmax;
3944 };
3945 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3946 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3947 }
3948
3949 case Builtin::BI__builtin_reduce_min: {
3950 auto GetIntrinsicID = [this](QualType QT) {
3951 if (auto *VecTy = QT->getAs<VectorType>())
3952 QT = VecTy->getElementType();
3953 else if (QT->isSizelessVectorType())
3955
3956 if (QT->isSignedIntegerType())
3957 return llvm::Intrinsic::vector_reduce_smin;
3958 if (QT->isUnsignedIntegerType())
3959 return llvm::Intrinsic::vector_reduce_umin;
3960 assert(QT->isFloatingType() && "must have a float here");
3961 return llvm::Intrinsic::vector_reduce_fmin;
3962 };
3963
3964 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3965 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3966 }
3967
3968 case Builtin::BI__builtin_reduce_add:
3969 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3970 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3971 case Builtin::BI__builtin_reduce_mul:
3972 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3973 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3974 case Builtin::BI__builtin_reduce_xor:
3975 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3976 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3977 case Builtin::BI__builtin_reduce_or:
3978 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3979 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3980 case Builtin::BI__builtin_reduce_and:
3981 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3982 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3983
3984 case Builtin::BI__builtin_matrix_transpose: {
3985 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
3986 Value *MatValue = EmitScalarExpr(E->getArg(0));
3987 MatrixBuilder MB(Builder);
3988 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
3989 MatrixTy->getNumColumns());
3990 return RValue::get(Result);
3991 }
3992
3993 case Builtin::BI__builtin_matrix_column_major_load: {
3994 MatrixBuilder MB(Builder);
3995 // Emit everything that isn't dependent on the first parameter type
3996 Value *Stride = EmitScalarExpr(E->getArg(3));
3997 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3998 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
3999 assert(PtrTy && "arg0 must be of pointer type");
4000 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4001
4002 Address Src = EmitPointerWithAlignment(E->getArg(0));
4004 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4005 0);
4006 Value *Result = MB.CreateColumnMajorLoad(
4007 Src.getElementType(), Src.emitRawPointer(*this),
4008 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4009 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4010 return RValue::get(Result);
4011 }
4012
4013 case Builtin::BI__builtin_matrix_column_major_store: {
4014 MatrixBuilder MB(Builder);
4015 Value *Matrix = EmitScalarExpr(E->getArg(0));
4016 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4017 Value *Stride = EmitScalarExpr(E->getArg(2));
4018
4019 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4020 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4021 assert(PtrTy && "arg1 must be of pointer type");
4022 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4023
4025 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4026 0);
4027 Value *Result = MB.CreateColumnMajorStore(
4028 Matrix, Dst.emitRawPointer(*this),
4029 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4030 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4031 return RValue::get(Result);
4032 }
4033
4034 case Builtin::BI__builtin_isinf_sign: {
4035 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4036 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4037 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4038 Value *Arg = EmitScalarExpr(E->getArg(0));
4039 Value *AbsArg = EmitFAbs(*this, Arg);
4040 Value *IsInf = Builder.CreateFCmpOEQ(
4041 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4042 Value *IsNeg = EmitSignBit(*this, Arg);
4043
4044 llvm::Type *IntTy = ConvertType(E->getType());
4045 Value *Zero = Constant::getNullValue(IntTy);
4046 Value *One = ConstantInt::get(IntTy, 1);
4047 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4048 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4049 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4050 return RValue::get(Result);
4051 }
4052
4053 case Builtin::BI__builtin_flt_rounds: {
4054 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4055
4056 llvm::Type *ResultType = ConvertType(E->getType());
4057 Value *Result = Builder.CreateCall(F);
4058 if (Result->getType() != ResultType)
4059 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4060 "cast");
4061 return RValue::get(Result);
4062 }
4063
4064 case Builtin::BI__builtin_set_flt_rounds: {
4065 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4066
4067 Value *V = EmitScalarExpr(E->getArg(0));
4068 Builder.CreateCall(F, V);
4069 return RValue::get(nullptr);
4070 }
4071
4072 case Builtin::BI__builtin_fpclassify: {
4073 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4074 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4075 Value *V = EmitScalarExpr(E->getArg(5));
4076 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4077
4078 // Create Result
4079 BasicBlock *Begin = Builder.GetInsertBlock();
4080 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4081 Builder.SetInsertPoint(End);
4082 PHINode *Result =
4083 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4084 "fpclassify_result");
4085
4086 // if (V==0) return FP_ZERO
4087 Builder.SetInsertPoint(Begin);
4088 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4089 "iszero");
4090 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4091 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4092 Builder.CreateCondBr(IsZero, End, NotZero);
4093 Result->addIncoming(ZeroLiteral, Begin);
4094
4095 // if (V != V) return FP_NAN
4096 Builder.SetInsertPoint(NotZero);
4097 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4098 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4099 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4100 Builder.CreateCondBr(IsNan, End, NotNan);
4101 Result->addIncoming(NanLiteral, NotZero);
4102
4103 // if (fabs(V) == infinity) return FP_INFINITY
4104 Builder.SetInsertPoint(NotNan);
4105 Value *VAbs = EmitFAbs(*this, V);
4106 Value *IsInf =
4107 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4108 "isinf");
4109 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4110 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4111 Builder.CreateCondBr(IsInf, End, NotInf);
4112 Result->addIncoming(InfLiteral, NotNan);
4113
4114 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4115 Builder.SetInsertPoint(NotInf);
4116 APFloat Smallest = APFloat::getSmallestNormalized(
4117 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4118 Value *IsNormal =
4119 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4120 "isnormal");
4121 Value *NormalResult =
4122 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4123 EmitScalarExpr(E->getArg(3)));
4124 Builder.CreateBr(End);
4125 Result->addIncoming(NormalResult, NotInf);
4126
4127 // return Result
4128 Builder.SetInsertPoint(End);
4129 return RValue::get(Result);
4130 }
4131
4132 // An alloca will always return a pointer to the alloca (stack) address
4133 // space. This address space need not be the same as the AST / Language
4134 // default (e.g. in C / C++ auto vars are in the generic address space). At
4135 // the AST level this is handled within CreateTempAlloca et al., but for the
4136 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4137 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4138 case Builtin::BIalloca:
4139 case Builtin::BI_alloca:
4140 case Builtin::BI__builtin_alloca_uninitialized:
4141 case Builtin::BI__builtin_alloca: {
4142 Value *Size = EmitScalarExpr(E->getArg(0));
4143 const TargetInfo &TI = getContext().getTargetInfo();
4144 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4145 const Align SuitableAlignmentInBytes =
4146 CGM.getContext()
4148 .getAsAlign();
4149 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4150 AI->setAlignment(SuitableAlignmentInBytes);
4151 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4152 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4155 if (AAS != EAS) {
4156 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4157 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4158 EAS, Ty));
4159 }
4160 return RValue::get(AI);
4161 }
4162
4163 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4164 case Builtin::BI__builtin_alloca_with_align: {
4165 Value *Size = EmitScalarExpr(E->getArg(0));
4166 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4167 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4168 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4169 const Align AlignmentInBytes =
4170 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4171 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4172 AI->setAlignment(AlignmentInBytes);
4173 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4174 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4177 if (AAS != EAS) {
4178 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4179 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4180 EAS, Ty));
4181 }
4182 return RValue::get(AI);
4183 }
4184
4185 case Builtin::BIbzero:
4186 case Builtin::BI__builtin_bzero: {
4187 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4188 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4189 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4190 E->getArg(0)->getExprLoc(), FD, 0);
4191 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4192 return RValue::get(nullptr);
4193 }
4194
4195 case Builtin::BIbcopy:
4196 case Builtin::BI__builtin_bcopy: {
4197 Address Src = EmitPointerWithAlignment(E->getArg(0));
4198 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4199 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4201 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4202 0);
4204 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4205 0);
4206 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4207 return RValue::get(nullptr);
4208 }
4209
4210 case Builtin::BImemcpy:
4211 case Builtin::BI__builtin_memcpy:
4212 case Builtin::BImempcpy:
4213 case Builtin::BI__builtin_mempcpy: {
4214 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4215 Address Src = EmitPointerWithAlignment(E->getArg(1));
4216 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4217 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4218 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4219 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4220 if (BuiltinID == Builtin::BImempcpy ||
4221 BuiltinID == Builtin::BI__builtin_mempcpy)
4223 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4224 else
4225 return RValue::get(Dest, *this);
4226 }
4227
4228 case Builtin::BI__builtin_memcpy_inline: {
4229 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4230 Address Src = EmitPointerWithAlignment(E->getArg(1));
4231 uint64_t Size =
4232 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4233 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4234 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4235 Builder.CreateMemCpyInline(Dest, Src, Size);
4236 return RValue::get(nullptr);
4237 }
4238
4239 case Builtin::BI__builtin_char_memchr:
4240 BuiltinID = Builtin::BI__builtin_memchr;
4241 break;
4242
4243 case Builtin::BI__builtin___memcpy_chk: {
4244 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4245 Expr::EvalResult SizeResult, DstSizeResult;
4246 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4247 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4248 break;
4249 llvm::APSInt Size = SizeResult.Val.getInt();
4250 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4251 if (Size.ugt(DstSize))
4252 break;
4253 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4254 Address Src = EmitPointerWithAlignment(E->getArg(1));
4255 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4256 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4257 return RValue::get(Dest, *this);
4258 }
4259
4260 case Builtin::BI__builtin_objc_memmove_collectable: {
4261 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4262 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4263 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4265 DestAddr, SrcAddr, SizeVal);
4266 return RValue::get(DestAddr, *this);
4267 }
4268
4269 case Builtin::BI__builtin___memmove_chk: {
4270 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4271 Expr::EvalResult SizeResult, DstSizeResult;
4272 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4273 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4274 break;
4275 llvm::APSInt Size = SizeResult.Val.getInt();
4276 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4277 if (Size.ugt(DstSize))
4278 break;
4279 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4280 Address Src = EmitPointerWithAlignment(E->getArg(1));
4281 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4282 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4283 return RValue::get(Dest, *this);
4284 }
4285
4286 case Builtin::BImemmove:
4287 case Builtin::BI__builtin_memmove: {
4288 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4289 Address Src = EmitPointerWithAlignment(E->getArg(1));
4290 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4291 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4292 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4293 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4294 return RValue::get(Dest, *this);
4295 }
4296 case Builtin::BImemset:
4297 case Builtin::BI__builtin_memset: {
4298 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4299 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4300 Builder.getInt8Ty());
4301 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4302 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4303 E->getArg(0)->getExprLoc(), FD, 0);
4304 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4305 return RValue::get(Dest, *this);
4306 }
4307 case Builtin::BI__builtin_memset_inline: {
4308 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4309 Value *ByteVal =
4310 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4311 uint64_t Size =
4312 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4314 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4315 0);
4316 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4317 return RValue::get(nullptr);
4318 }
4319 case Builtin::BI__builtin___memset_chk: {
4320 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4321 Expr::EvalResult SizeResult, DstSizeResult;
4322 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4323 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4324 break;
4325 llvm::APSInt Size = SizeResult.Val.getInt();
4326 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4327 if (Size.ugt(DstSize))
4328 break;
4329 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4330 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4331 Builder.getInt8Ty());
4332 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4333 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4334 return RValue::get(Dest, *this);
4335 }
4336 case Builtin::BI__builtin_wmemchr: {
4337 // The MSVC runtime library does not provide a definition of wmemchr, so we
4338 // need an inline implementation.
4339 if (!getTarget().getTriple().isOSMSVCRT())
4340 break;
4341
4342 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4343 Value *Str = EmitScalarExpr(E->getArg(0));
4344 Value *Chr = EmitScalarExpr(E->getArg(1));
4345 Value *Size = EmitScalarExpr(E->getArg(2));
4346
4347 BasicBlock *Entry = Builder.GetInsertBlock();
4348 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4349 BasicBlock *Next = createBasicBlock("wmemchr.next");
4350 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4351 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4352 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4353
4354 EmitBlock(CmpEq);
4355 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4356 StrPhi->addIncoming(Str, Entry);
4357 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4358 SizePhi->addIncoming(Size, Entry);
4359 CharUnits WCharAlign =
4361 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4362 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4363 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4364 Builder.CreateCondBr(StrEqChr, Exit, Next);
4365
4366 EmitBlock(Next);
4367 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4368 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4369 Value *NextSizeEq0 =
4370 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4371 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4372 StrPhi->addIncoming(NextStr, Next);
4373 SizePhi->addIncoming(NextSize, Next);
4374
4375 EmitBlock(Exit);
4376 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4377 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4378 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4379 Ret->addIncoming(FoundChr, CmpEq);
4380 return RValue::get(Ret);
4381 }
4382 case Builtin::BI__builtin_wmemcmp: {
4383 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4384 // need an inline implementation.
4385 if (!getTarget().getTriple().isOSMSVCRT())
4386 break;
4387
4388 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4389
4390 Value *Dst = EmitScalarExpr(E->getArg(0));
4391 Value *Src = EmitScalarExpr(E->getArg(1));
4392 Value *Size = EmitScalarExpr(E->getArg(2));
4393
4394 BasicBlock *Entry = Builder.GetInsertBlock();
4395 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4396 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4397 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4398 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4399 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4400 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4401
4402 EmitBlock(CmpGT);
4403 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4404 DstPhi->addIncoming(Dst, Entry);
4405 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4406 SrcPhi->addIncoming(Src, Entry);
4407 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4408 SizePhi->addIncoming(Size, Entry);
4409 CharUnits WCharAlign =
4411 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4412 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4413 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4414 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4415
4416 EmitBlock(CmpLT);
4417 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4418 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4419
4420 EmitBlock(Next);
4421 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4422 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4423 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4424 Value *NextSizeEq0 =
4425 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4426 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4427 DstPhi->addIncoming(NextDst, Next);
4428 SrcPhi->addIncoming(NextSrc, Next);
4429 SizePhi->addIncoming(NextSize, Next);
4430
4431 EmitBlock(Exit);
4432 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4433 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4434 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4435 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4436 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4437 return RValue::get(Ret);
4438 }
4439 case Builtin::BI__builtin_dwarf_cfa: {
4440 // The offset in bytes from the first argument to the CFA.
4441 //
4442 // Why on earth is this in the frontend? Is there any reason at
4443 // all that the backend can't reasonably determine this while
4444 // lowering llvm.eh.dwarf.cfa()?
4445 //
4446 // TODO: If there's a satisfactory reason, add a target hook for
4447 // this instead of hard-coding 0, which is correct for most targets.
4448 int32_t Offset = 0;
4449
4450 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4451 return RValue::get(Builder.CreateCall(F,
4452 llvm::ConstantInt::get(Int32Ty, Offset)));
4453 }
4454 case Builtin::BI__builtin_return_address: {
4455 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4456 getContext().UnsignedIntTy);
4457 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4458 return RValue::get(Builder.CreateCall(F, Depth));
4459 }
4460 case Builtin::BI_ReturnAddress: {
4461 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4462 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4463 }
4464 case Builtin::BI__builtin_frame_address: {
4465 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4466 getContext().UnsignedIntTy);
4467 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4468 return RValue::get(Builder.CreateCall(F, Depth));
4469 }
4470 case Builtin::BI__builtin_extract_return_addr: {
4471 Value *Address = EmitScalarExpr(E->getArg(0));
4473 return RValue::get(Result);
4474 }
4475 case Builtin::BI__builtin_frob_return_addr: {
4476 Value *Address = EmitScalarExpr(E->getArg(0));
4478 return RValue::get(Result);
4479 }
4480 case Builtin::BI__builtin_dwarf_sp_column: {
4481 llvm::IntegerType *Ty
4482 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4484 if (Column == -1) {
4485 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4486 return RValue::get(llvm::UndefValue::get(Ty));
4487 }
4488 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4489 }
4490 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4491 Value *Address = EmitScalarExpr(E->getArg(0));
4492 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4493 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4494 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4495 }
4496 case Builtin::BI__builtin_eh_return: {
4497 Value *Int = EmitScalarExpr(E->getArg(0));
4498 Value *Ptr = EmitScalarExpr(E->getArg(1));
4499
4500 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4501 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4502 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4503 Function *F =
4504 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4505 : Intrinsic::eh_return_i64);
4506 Builder.CreateCall(F, {Int, Ptr});
4507 Builder.CreateUnreachable();
4508
4509 // We do need to preserve an insertion point.
4510 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4511
4512 return RValue::get(nullptr);
4513 }
4514 case Builtin::BI__builtin_unwind_init: {
4515 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4516 Builder.CreateCall(F);
4517 return RValue::get(nullptr);
4518 }
4519 case Builtin::BI__builtin_extend_pointer: {
4520 // Extends a pointer to the size of an _Unwind_Word, which is
4521 // uint64_t on all platforms. Generally this gets poked into a
4522 // register and eventually used as an address, so if the
4523 // addressing registers are wider than pointers and the platform
4524 // doesn't implicitly ignore high-order bits when doing
4525 // addressing, we need to make sure we zext / sext based on
4526 // the platform's expectations.
4527 //
4528 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4529
4530 // Cast the pointer to intptr_t.
4531 Value *Ptr = EmitScalarExpr(E->getArg(0));
4532 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4533
4534 // If that's 64 bits, we're done.
4535 if (IntPtrTy->getBitWidth() == 64)
4536 return RValue::get(Result);
4537
4538 // Otherwise, ask the codegen data what to do.
4539 if (getTargetHooks().extendPointerWithSExt())
4540 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4541 else
4542 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4543 }
4544 case Builtin::BI__builtin_setjmp: {
4545 // Buffer is a void**.
4546 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4547
4548 // Store the frame pointer to the setjmp buffer.
4549 Value *FrameAddr = Builder.CreateCall(
4550 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4551 ConstantInt::get(Int32Ty, 0));
4552 Builder.CreateStore(FrameAddr, Buf);
4553
4554 // Store the stack pointer to the setjmp buffer.
4555 Value *StackAddr = Builder.CreateStackSave();
4556 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4557
4558 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4559 Builder.CreateStore(StackAddr, StackSaveSlot);
4560
4561 // Call LLVM's EH setjmp, which is lightweight.
4562 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4563 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4564 }
4565 case Builtin::BI__builtin_longjmp: {
4566 Value *Buf = EmitScalarExpr(E->getArg(0));
4567
4568 // Call LLVM's EH longjmp, which is lightweight.
4569 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4570
4571 // longjmp doesn't return; mark this as unreachable.
4572 Builder.CreateUnreachable();
4573
4574 // We do need to preserve an insertion point.
4575 EmitBlock(createBasicBlock("longjmp.cont"));
4576
4577 return RValue::get(nullptr);
4578 }
4579 case Builtin::BI__builtin_launder: {
4580 const Expr *Arg = E->getArg(0);
4581 QualType ArgTy = Arg->getType()->getPointeeType();
4582 Value *Ptr = EmitScalarExpr(Arg);
4583 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4585
4586 return RValue::get(Ptr);
4587 }
4588 case Builtin::BI__sync_fetch_and_add:
4589 case Builtin::BI__sync_fetch_and_sub:
4590 case Builtin::BI__sync_fetch_and_or:
4591 case Builtin::BI__sync_fetch_and_and:
4592 case Builtin::BI__sync_fetch_and_xor:
4593 case Builtin::BI__sync_fetch_and_nand:
4594 case Builtin::BI__sync_add_and_fetch:
4595 case Builtin::BI__sync_sub_and_fetch:
4596 case Builtin::BI__sync_and_and_fetch:
4597 case Builtin::BI__sync_or_and_fetch:
4598 case Builtin::BI__sync_xor_and_fetch:
4599 case Builtin::BI__sync_nand_and_fetch:
4600 case Builtin::BI__sync_val_compare_and_swap:
4601 case Builtin::BI__sync_bool_compare_and_swap:
4602 case Builtin::BI__sync_lock_test_and_set:
4603 case Builtin::BI__sync_lock_release:
4604 case Builtin::BI__sync_swap:
4605 llvm_unreachable("Shouldn't make it through sema");
4606 case Builtin::BI__sync_fetch_and_add_1:
4607 case Builtin::BI__sync_fetch_and_add_2:
4608 case Builtin::BI__sync_fetch_and_add_4:
4609 case Builtin::BI__sync_fetch_and_add_8:
4610 case Builtin::BI__sync_fetch_and_add_16:
4611 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4612 case Builtin::BI__sync_fetch_and_sub_1:
4613 case Builtin::BI__sync_fetch_and_sub_2:
4614 case Builtin::BI__sync_fetch_and_sub_4:
4615 case Builtin::BI__sync_fetch_and_sub_8:
4616 case Builtin::BI__sync_fetch_and_sub_16:
4617 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4618 case Builtin::BI__sync_fetch_and_or_1:
4619 case Builtin::BI__sync_fetch_and_or_2:
4620 case Builtin::BI__sync_fetch_and_or_4:
4621 case Builtin::BI__sync_fetch_and_or_8:
4622 case Builtin::BI__sync_fetch_and_or_16:
4623 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4624 case Builtin::BI__sync_fetch_and_and_1:
4625 case Builtin::BI__sync_fetch_and_and_2:
4626 case Builtin::BI__sync_fetch_and_and_4:
4627 case Builtin::BI__sync_fetch_and_and_8:
4628 case Builtin::BI__sync_fetch_and_and_16:
4629 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4630 case Builtin::BI__sync_fetch_and_xor_1:
4631 case Builtin::BI__sync_fetch_and_xor_2:
4632 case Builtin::BI__sync_fetch_and_xor_4:
4633 case Builtin::BI__sync_fetch_and_xor_8:
4634 case Builtin::BI__sync_fetch_and_xor_16:
4635 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4636 case Builtin::BI__sync_fetch_and_nand_1:
4637 case Builtin::BI__sync_fetch_and_nand_2:
4638 case Builtin::BI__sync_fetch_and_nand_4:
4639 case Builtin::BI__sync_fetch_and_nand_8:
4640 case Builtin::BI__sync_fetch_and_nand_16:
4641 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4642
4643 // Clang extensions: not overloaded yet.
4644 case Builtin::BI__sync_fetch_and_min:
4645 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4646 case Builtin::BI__sync_fetch_and_max:
4647 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4648 case Builtin::BI__sync_fetch_and_umin:
4649 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4650 case Builtin::BI__sync_fetch_and_umax:
4651 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4652
4653 case Builtin::BI__sync_add_and_fetch_1:
4654 case Builtin::BI__sync_add_and_fetch_2:
4655 case Builtin::BI__sync_add_and_fetch_4:
4656 case Builtin::BI__sync_add_and_fetch_8:
4657 case Builtin::BI__sync_add_and_fetch_16:
4658 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4659 llvm::Instruction::Add);
4660 case Builtin::BI__sync_sub_and_fetch_1:
4661 case Builtin::BI__sync_sub_and_fetch_2:
4662 case Builtin::BI__sync_sub_and_fetch_4:
4663 case Builtin::BI__sync_sub_and_fetch_8:
4664 case Builtin::BI__sync_sub_and_fetch_16:
4665 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4666 llvm::Instruction::Sub);
4667 case Builtin::BI__sync_and_and_fetch_1:
4668 case Builtin::BI__sync_and_and_fetch_2:
4669 case Builtin::BI__sync_and_and_fetch_4:
4670 case Builtin::BI__sync_and_and_fetch_8:
4671 case Builtin::BI__sync_and_and_fetch_16:
4672 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4673 llvm::Instruction::And);
4674 case Builtin::BI__sync_or_and_fetch_1:
4675 case Builtin::BI__sync_or_and_fetch_2:
4676 case Builtin::BI__sync_or_and_fetch_4:
4677 case Builtin::BI__sync_or_and_fetch_8:
4678 case Builtin::BI__sync_or_and_fetch_16:
4679 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
4680 llvm::Instruction::Or);
4681 case Builtin::BI__sync_xor_and_fetch_1:
4682 case Builtin::BI__sync_xor_and_fetch_2:
4683 case Builtin::BI__sync_xor_and_fetch_4:
4684 case Builtin::BI__sync_xor_and_fetch_8:
4685 case Builtin::BI__sync_xor_and_fetch_16:
4686 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
4687 llvm::Instruction::Xor);
4688 case Builtin::BI__sync_nand_and_fetch_1:
4689 case Builtin::BI__sync_nand_and_fetch_2:
4690 case Builtin::BI__sync_nand_and_fetch_4:
4691 case Builtin::BI__sync_nand_and_fetch_8:
4692 case Builtin::BI__sync_nand_and_fetch_16:
4693 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
4694 llvm::Instruction::And, true);
4695
4696 case Builtin::BI__sync_val_compare_and_swap_1:
4697 case Builtin::BI__sync_val_compare_and_swap_2:
4698 case Builtin::BI__sync_val_compare_and_swap_4:
4699 case Builtin::BI__sync_val_compare_and_swap_8:
4700 case Builtin::BI__sync_val_compare_and_swap_16:
4701 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
4702
4703 case Builtin::BI__sync_bool_compare_and_swap_1:
4704 case Builtin::BI__sync_bool_compare_and_swap_2:
4705 case Builtin::BI__sync_bool_compare_and_swap_4:
4706 case Builtin::BI__sync_bool_compare_and_swap_8:
4707 case Builtin::BI__sync_bool_compare_and_swap_16:
4708 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
4709
4710 case Builtin::BI__sync_swap_1:
4711 case Builtin::BI__sync_swap_2:
4712 case Builtin::BI__sync_swap_4:
4713 case Builtin::BI__sync_swap_8:
4714 case Builtin::BI__sync_swap_16:
4715 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4716
4717 case Builtin::BI__sync_lock_test_and_set_1:
4718 case Builtin::BI__sync_lock_test_and_set_2:
4719 case Builtin::BI__sync_lock_test_and_set_4:
4720 case Builtin::BI__sync_lock_test_and_set_8:
4721 case Builtin::BI__sync_lock_test_and_set_16:
4722 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4723
4724 case Builtin::BI__sync_lock_release_1:
4725 case Builtin::BI__sync_lock_release_2:
4726 case Builtin::BI__sync_lock_release_4:
4727 case Builtin::BI__sync_lock_release_8:
4728 case Builtin::BI__sync_lock_release_16: {
4729 Address Ptr = CheckAtomicAlignment(*this, E);
4730 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4731
4732 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4733 getContext().getTypeSize(ElTy));
4734 llvm::StoreInst *Store =
4735 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
4736 Store->setAtomic(llvm::AtomicOrdering::Release);
4737 return RValue::get(nullptr);
4738 }
4739
4740 case Builtin::BI__sync_synchronize: {
4741 // We assume this is supposed to correspond to a C++0x-style
4742 // sequentially-consistent fence (i.e. this is only usable for
4743 // synchronization, not device I/O or anything like that). This intrinsic
4744 // is really badly designed in the sense that in theory, there isn't
4745 // any way to safely use it... but in practice, it mostly works
4746 // to use it with non-atomic loads and stores to get acquire/release
4747 // semantics.
4748 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
4749 return RValue::get(nullptr);
4750 }
4751
4752 case Builtin::BI__builtin_nontemporal_load:
4753 return RValue::get(EmitNontemporalLoad(*this, E));
4754 case Builtin::BI__builtin_nontemporal_store:
4755 return RValue::get(EmitNontemporalStore(*this, E));
4756 case Builtin::BI__c11_atomic_is_lock_free:
4757 case Builtin::BI__atomic_is_lock_free: {
4758 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4759 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4760 // _Atomic(T) is always properly-aligned.
4761 const char *LibCallName = "__atomic_is_lock_free";
4762 CallArgList Args;
4763 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
4764 getContext().getSizeType());
4765 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4766 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
4768 else
4769 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
4771 const CGFunctionInfo &FuncInfo =
4773 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
4774 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
4775 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
4776 ReturnValueSlot(), Args);
4777 }
4778
4779 case Builtin::BI__atomic_test_and_set: {
4780 // Look at the argument type to determine whether this is a volatile
4781 // operation. The parameter type is always volatile.
4782 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4783 bool Volatile =
4785
4786 Address Ptr =
4788
4789 Value *NewVal = Builder.getInt8(1);
4790 Value *Order = EmitScalarExpr(E->getArg(1));
4791 if (isa<llvm::ConstantInt>(Order)) {
4792 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4793 AtomicRMWInst *Result = nullptr;
4794 switch (ord) {
4795 case 0: // memory_order_relaxed
4796 default: // invalid order
4797 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4798 llvm::AtomicOrdering::Monotonic);
4799 break;
4800 case 1: // memory_order_consume
4801 case 2: // memory_order_acquire
4802 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4803 llvm::AtomicOrdering::Acquire);
4804 break;
4805 case 3: // memory_order_release
4806 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4807 llvm::AtomicOrdering::Release);
4808 break;
4809 case 4: // memory_order_acq_rel
4810
4811 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4812 llvm::AtomicOrdering::AcquireRelease);
4813 break;
4814 case 5: // memory_order_seq_cst
4816 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4817 llvm::AtomicOrdering::SequentiallyConsistent);
4818 break;
4819 }
4820 Result->setVolatile(Volatile);
4821 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4822 }
4823
4824 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4825
4826 llvm::BasicBlock *BBs[5] = {
4827 createBasicBlock("monotonic", CurFn),
4828 createBasicBlock("acquire", CurFn),
4829 createBasicBlock("release", CurFn),
4830 createBasicBlock("acqrel", CurFn),
4831 createBasicBlock("seqcst", CurFn)
4832 };
4833 llvm::AtomicOrdering Orders[5] = {
4834 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4835 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4836 llvm::AtomicOrdering::SequentiallyConsistent};
4837
4838 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4839 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4840
4841 Builder.SetInsertPoint(ContBB);
4842 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4843
4844 for (unsigned i = 0; i < 5; ++i) {
4845 Builder.SetInsertPoint(BBs[i]);
4846 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4847 Ptr, NewVal, Orders[i]);
4848 RMW->setVolatile(Volatile);
4849 Result->addIncoming(RMW, BBs[i]);
4850 Builder.CreateBr(ContBB);
4851 }
4852
4853 SI->addCase(Builder.getInt32(0), BBs[0]);
4854 SI->addCase(Builder.getInt32(1), BBs[1]);
4855 SI->addCase(Builder.getInt32(2), BBs[1]);
4856 SI->addCase(Builder.getInt32(3), BBs[2]);
4857 SI->addCase(Builder.getInt32(4), BBs[3]);
4858 SI->addCase(Builder.getInt32(5), BBs[4]);
4859
4860 Builder.SetInsertPoint(ContBB);
4861 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4862 }
4863
4864 case Builtin::BI__atomic_clear: {
4865 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4866 bool Volatile =
4868
4869 Address Ptr = EmitPointerWithAlignment(E->getArg(0));
4870 Ptr = Ptr.withElementType(Int8Ty);
4871 Value *NewVal = Builder.getInt8(0);
4872 Value *Order = EmitScalarExpr(E->getArg(1));
4873 if (isa<llvm::ConstantInt>(Order)) {
4874 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4875 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4876 switch (ord) {
4877 case 0: // memory_order_relaxed
4878 default: // invalid order
4879 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4880 break;
4881 case 3: // memory_order_release
4882 Store->setOrdering(llvm::AtomicOrdering::Release);
4883 break;
4884 case 5: // memory_order_seq_cst
4885 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4886 break;
4887 }
4888 return RValue::get(nullptr);
4889 }
4890
4891 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4892
4893 llvm::BasicBlock *BBs[3] = {
4894 createBasicBlock("monotonic", CurFn),
4895 createBasicBlock("release", CurFn),
4896 createBasicBlock("seqcst", CurFn)
4897 };
4898 llvm::AtomicOrdering Orders[3] = {
4899 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4900 llvm::AtomicOrdering::SequentiallyConsistent};
4901
4902 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4903 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4904
4905 for (unsigned i = 0; i < 3; ++i) {
4906 Builder.SetInsertPoint(BBs[i]);
4907 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4908 Store->setOrdering(Orders[i]);
4909 Builder.CreateBr(ContBB);
4910 }
4911
4912 SI->addCase(Builder.getInt32(0), BBs[0]);
4913 SI->addCase(Builder.getInt32(3), BBs[1]);
4914 SI->addCase(Builder.getInt32(5), BBs[2]);
4915
4916 Builder.SetInsertPoint(ContBB);
4917 return RValue::get(nullptr);
4918 }
4919
4920 case Builtin::BI__atomic_thread_fence:
4921 case Builtin::BI__atomic_signal_fence:
4922 case Builtin::BI__c11_atomic_thread_fence:
4923 case Builtin::BI__c11_atomic_signal_fence: {
4924 llvm::SyncScope::ID SSID;
4925 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4926 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4927 SSID = llvm::SyncScope::SingleThread;
4928 else
4929 SSID = llvm::SyncScope::System;
4930 Value *Order = EmitScalarExpr(E->getArg(0));
4931 if (isa<llvm::ConstantInt>(Order)) {
4932 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4933 switch (ord) {
4934 case 0: // memory_order_relaxed
4935 default: // invalid order
4936 break;
4937 case 1: // memory_order_consume
4938 case 2: // memory_order_acquire
4939 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4940 break;
4941 case 3: // memory_order_release
4942 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4943 break;
4944 case 4: // memory_order_acq_rel
4945 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4946 break;
4947 case 5: // memory_order_seq_cst
4948 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4949 break;
4950 }
4951 return RValue::get(nullptr);
4952 }
4953
4954 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4955 AcquireBB = createBasicBlock("acquire", CurFn);
4956 ReleaseBB = createBasicBlock("release", CurFn);
4957 AcqRelBB = createBasicBlock("acqrel", CurFn);
4958 SeqCstBB = createBasicBlock("seqcst", CurFn);
4959 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4960
4961 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4962 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4963
4964 Builder.SetInsertPoint(AcquireBB);
4965 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4966 Builder.CreateBr(ContBB);
4967 SI->addCase(Builder.getInt32(1), AcquireBB);
4968 SI->addCase(Builder.getInt32(2), AcquireBB);
4969
4970 Builder.SetInsertPoint(ReleaseBB);
4971 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4972 Builder.CreateBr(ContBB);
4973 SI->addCase(Builder.getInt32(3), ReleaseBB);
4974
4975 Builder.SetInsertPoint(AcqRelBB);
4976 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4977 Builder.CreateBr(ContBB);
4978 SI->addCase(Builder.getInt32(4), AcqRelBB);
4979
4980 Builder.SetInsertPoint(SeqCstBB);
4981 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4982 Builder.CreateBr(ContBB);
4983 SI->addCase(Builder.getInt32(5), SeqCstBB);
4984
4985 Builder.SetInsertPoint(ContBB);
4986 return RValue::get(nullptr);
4987 }
4988
4989 case Builtin::BI__builtin_signbit:
4990 case Builtin::BI__builtin_signbitf:
4991 case Builtin::BI__builtin_signbitl: {
4992 return RValue::get(
4993 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
4994 ConvertType(E->getType())));
4995 }
4996 case Builtin::BI__warn_memset_zero_len:
4997 return RValue::getIgnored();
4998 case Builtin::BI__annotation: {
4999 // Re-encode each wide string to UTF8 and make an MDString.
5001 for (const Expr *Arg : E->arguments()) {
5002 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
5003 assert(Str->getCharByteWidth() == 2);
5004 StringRef WideBytes = Str->getBytes();
5005 std::string StrUtf8;
5006 if (!convertUTF16ToUTF8String(
5007 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5008 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5009 continue;
5010 }
5011 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5012 }
5013
5014 // Build and MDTuple of MDStrings and emit the intrinsic call.
5015 llvm::Function *F =
5016 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5017 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5018 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5019 return RValue::getIgnored();
5020 }
5021 case Builtin::BI__builtin_annotation: {
5022 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5023 llvm::Function *F =
5024 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5025 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5026
5027 // Get the annotation string, go through casts. Sema requires this to be a
5028 // non-wide string literal, potentially casted, so the cast<> is safe.
5029 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5030 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5031 return RValue::get(
5032 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5033 }
5034 case Builtin::BI__builtin_addcb:
5035 case Builtin::BI__builtin_addcs:
5036 case Builtin::BI__builtin_addc:
5037 case Builtin::BI__builtin_addcl:
5038 case Builtin::BI__builtin_addcll:
5039 case Builtin::BI__builtin_subcb:
5040 case Builtin::BI__builtin_subcs:
5041 case Builtin::BI__builtin_subc:
5042 case Builtin::BI__builtin_subcl:
5043 case Builtin::BI__builtin_subcll: {
5044
5045 // We translate all of these builtins from expressions of the form:
5046 // int x = ..., y = ..., carryin = ..., carryout, result;
5047 // result = __builtin_addc(x, y, carryin, &carryout);
5048 //
5049 // to LLVM IR of the form:
5050 //
5051 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5052 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5053 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5054 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5055 // i32 %carryin)
5056 // %result = extractvalue {i32, i1} %tmp2, 0
5057 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5058 // %tmp3 = or i1 %carry1, %carry2
5059 // %tmp4 = zext i1 %tmp3 to i32
5060 // store i32 %tmp4, i32* %carryout
5061
5062 // Scalarize our inputs.
5063 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5064 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5065 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5066 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5067
5068 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5069 llvm::Intrinsic::ID IntrinsicId;
5070 switch (BuiltinID) {
5071 default: llvm_unreachable("Unknown multiprecision builtin id.");
5072 case Builtin::BI__builtin_addcb:
5073 case Builtin::BI__builtin_addcs:
5074 case Builtin::BI__builtin_addc:
5075 case Builtin::BI__builtin_addcl:
5076 case Builtin::BI__builtin_addcll:
5077 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5078 break;
5079 case Builtin::BI__builtin_subcb:
5080 case Builtin::BI__builtin_subcs:
5081 case Builtin::BI__builtin_subc:
5082 case Builtin::BI__builtin_subcl:
5083 case Builtin::BI__builtin_subcll:
5084 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5085 break;
5086 }
5087
5088 // Construct our resulting LLVM IR expression.
5089 llvm::Value *Carry1;
5090 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5091 X, Y, Carry1);
5092 llvm::Value *Carry2;
5093 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5094 Sum1, Carryin, Carry2);
5095 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5096 X->getType());
5097 Builder.CreateStore(CarryOut, CarryOutPtr);
5098 return RValue::get(Sum2);
5099 }
5100
5101 case Builtin::BI__builtin_add_overflow:
5102 case Builtin::BI__builtin_sub_overflow:
5103 case Builtin::BI__builtin_mul_overflow: {
5104 const clang::Expr *LeftArg = E->getArg(0);
5105 const clang::Expr *RightArg = E->getArg(1);
5106 const clang::Expr *ResultArg = E->getArg(2);
5107
5108 clang::QualType ResultQTy =
5109 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5110
5111 WidthAndSignedness LeftInfo =
5113 WidthAndSignedness RightInfo =
5115 WidthAndSignedness ResultInfo =
5117
5118 // Handle mixed-sign multiplication as a special case, because adding
5119 // runtime or backend support for our generic irgen would be too expensive.
5120 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5121 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5122 RightInfo, ResultArg, ResultQTy,
5123 ResultInfo);
5124
5125 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5126 ResultInfo))
5128 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5129 ResultInfo);
5130
5131 WidthAndSignedness EncompassingInfo =
5132 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5133
5134 llvm::Type *EncompassingLLVMTy =
5135 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5136
5137 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5138
5139 llvm::Intrinsic::ID IntrinsicId;
5140 switch (BuiltinID) {
5141 default:
5142 llvm_unreachable("Unknown overflow builtin id.");
5143 case Builtin::BI__builtin_add_overflow:
5144 IntrinsicId = EncompassingInfo.Signed
5145 ? llvm::Intrinsic::sadd_with_overflow
5146 : llvm::Intrinsic::uadd_with_overflow;
5147 break;
5148 case Builtin::BI__builtin_sub_overflow:
5149 IntrinsicId = EncompassingInfo.Signed
5150 ? llvm::Intrinsic::ssub_with_overflow
5151 : llvm::Intrinsic::usub_with_overflow;
5152 break;
5153 case Builtin::BI__builtin_mul_overflow:
5154 IntrinsicId = EncompassingInfo.Signed
5155 ? llvm::Intrinsic::smul_with_overflow
5156 : llvm::Intrinsic::umul_with_overflow;
5157 break;
5158 }
5159
5160 llvm::Value *Left = EmitScalarExpr(LeftArg);
5161 llvm::Value *Right = EmitScalarExpr(RightArg);
5162 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5163
5164 // Extend each operand to the encompassing type.
5165 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5166 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5167
5168 // Perform the operation on the extended values.
5169 llvm::Value *Overflow, *Result;
5170 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5171
5172 if (EncompassingInfo.Width > ResultInfo.Width) {
5173 // The encompassing type is wider than the result type, so we need to
5174 // truncate it.
5175 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5176
5177 // To see if the truncation caused an overflow, we will extend
5178 // the result and then compare it to the original result.
5179 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5180 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5181 llvm::Value *TruncationOverflow =
5182 Builder.CreateICmpNE(Result, ResultTruncExt);
5183
5184 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5185 Result = ResultTrunc;
5186 }
5187
5188 // Finally, store the result using the pointer.
5189 bool isVolatile =
5190 ResultArg->getType()->getPointeeType().isVolatileQualified();
5191 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5192
5193 return RValue::get(Overflow);
5194 }
5195
5196 case Builtin::BI__builtin_uadd_overflow:
5197 case Builtin::BI__builtin_uaddl_overflow:
5198 case Builtin::BI__builtin_uaddll_overflow:
5199 case Builtin::BI__builtin_usub_overflow:
5200 case Builtin::BI__builtin_usubl_overflow:
5201 case Builtin::BI__builtin_usubll_overflow:
5202 case Builtin::BI__builtin_umul_overflow:
5203 case Builtin::BI__builtin_umull_overflow:
5204 case Builtin::BI__builtin_umulll_overflow:
5205 case Builtin::BI__builtin_sadd_overflow:
5206 case Builtin::BI__builtin_saddl_overflow:
5207 case Builtin::BI__builtin_saddll_overflow:
5208 case Builtin::BI__builtin_ssub_overflow:
5209 case Builtin::BI__builtin_ssubl_overflow:
5210 case Builtin::BI__builtin_ssubll_overflow:
5211 case Builtin::BI__builtin_smul_overflow:
5212 case Builtin::BI__builtin_smull_overflow:
5213 case Builtin::BI__builtin_smulll_overflow: {
5214
5215 // We translate all of these builtins directly to the relevant llvm IR node.
5216
5217 // Scalarize our inputs.
5218 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5219 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5220 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5221
5222 // Decide which of the overflow intrinsics we are lowering to:
5223 llvm::Intrinsic::ID IntrinsicId;
5224 switch (BuiltinID) {
5225 default: llvm_unreachable("Unknown overflow builtin id.");
5226 case Builtin::BI__builtin_uadd_overflow:
5227 case Builtin::BI__builtin_uaddl_overflow:
5228 case Builtin::BI__builtin_uaddll_overflow:
5229 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5230 break;
5231 case Builtin::BI__builtin_usub_overflow:
5232 case Builtin::BI__builtin_usubl_overflow:
5233 case Builtin::BI__builtin_usubll_overflow:
5234 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5235 break;
5236 case Builtin::BI__builtin_umul_overflow:
5237 case Builtin::BI__builtin_umull_overflow:
5238 case Builtin::BI__builtin_umulll_overflow:
5239 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5240 break;
5241 case Builtin::BI__builtin_sadd_overflow:
5242 case Builtin::BI__builtin_saddl_overflow:
5243 case Builtin::BI__builtin_saddll_overflow:
5244 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5245 break;
5246 case Builtin::BI__builtin_ssub_overflow:
5247 case Builtin::BI__builtin_ssubl_overflow:
5248 case Builtin::BI__builtin_ssubll_overflow:
5249 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5250 break;
5251 case Builtin::BI__builtin_smul_overflow:
5252 case Builtin::BI__builtin_smull_overflow:
5253 case Builtin::BI__builtin_smulll_overflow:
5254 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5255 break;
5256 }
5257
5258
5259 llvm::Value *Carry;
5260 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5261 Builder.CreateStore(Sum, SumOutPtr);
5262
5263 return RValue::get(Carry);
5264 }
5265 case Builtin::BIaddressof:
5266 case Builtin::BI__addressof:
5267 case Builtin::BI__builtin_addressof:
5268 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5269 case Builtin::BI__builtin_function_start:
5272 case Builtin::BI__builtin_operator_new:
5274 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5275 case Builtin::BI__builtin_operator_delete:
5277 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5278 return RValue::get(nullptr);
5279
5280 case Builtin::BI__builtin_is_aligned:
5281 return EmitBuiltinIsAligned(E);
5282 case Builtin::BI__builtin_align_up:
5283 return EmitBuiltinAlignTo(E, true);
5284 case Builtin::BI__builtin_align_down:
5285 return EmitBuiltinAlignTo(E, false);
5286
5287 case Builtin::BI__noop:
5288 // __noop always evaluates to an integer literal zero.
5289 return RValue::get(ConstantInt::get(IntTy, 0));
5290 case Builtin::BI__builtin_call_with_static_chain: {
5291 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5292 const Expr *Chain = E->getArg(1);
5293 return EmitCall(Call->getCallee()->getType(),
5294 EmitCallee(Call->getCallee()), Call, ReturnValue,
5295 EmitScalarExpr(Chain));
5296 }
5297 case Builtin::BI_InterlockedExchange8:
5298 case Builtin::BI_InterlockedExchange16:
5299 case Builtin::BI_InterlockedExchange:
5300 case Builtin::BI_InterlockedExchangePointer:
5301 return RValue::get(
5302 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5303 case Builtin::BI_InterlockedCompareExchangePointer:
5304 case Builtin::BI_InterlockedCompareExchangePointer_nf: {
5305 llvm::Type *RTy;
5306 llvm::IntegerType *IntType = IntegerType::get(
5308
5309 Address DestAddr = CheckAtomicAlignment(*this, E);
5310
5311 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
5312 RTy = Exchange->getType();
5313 Exchange = Builder.CreatePtrToInt(Exchange, IntType);
5314
5315 llvm::Value *Comparand =
5316 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
5317
5318 auto Ordering =
5319 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
5320 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
5321
5322 auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
5323 Ordering, Ordering);
5324 Result->setVolatile(true);
5325
5326 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
5327 0),
5328 RTy));
5329 }
5330 case Builtin::BI_InterlockedCompareExchange8:
5331 case Builtin::BI_InterlockedCompareExchange16:
5332 case Builtin::BI_InterlockedCompareExchange:
5333 case Builtin::BI_InterlockedCompareExchange64:
5335 case Builtin::BI_InterlockedIncrement16:
5336 case Builtin::BI_InterlockedIncrement:
5337 return RValue::get(
5338 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5339 case Builtin::BI_InterlockedDecrement16:
5340 case Builtin::BI_InterlockedDecrement:
5341 return RValue::get(
5342 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5343 case Builtin::BI_InterlockedAnd8:
5344 case Builtin::BI_InterlockedAnd16:
5345 case Builtin::BI_InterlockedAnd:
5346 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5347 case Builtin::BI_InterlockedExchangeAdd8:
5348 case Builtin::BI_InterlockedExchangeAdd16:
5349 case Builtin::BI_InterlockedExchangeAdd:
5350 return RValue::get(
5351 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5352 case Builtin::BI_InterlockedExchangeSub8:
5353 case Builtin::BI_InterlockedExchangeSub16:
5354 case Builtin::BI_InterlockedExchangeSub:
5355 return RValue::get(
5356 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5357 case Builtin::BI_InterlockedOr8:
5358 case Builtin::BI_InterlockedOr16:
5359 case Builtin::BI_InterlockedOr:
5360 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5361 case Builtin::BI_InterlockedXor8:
5362 case Builtin::BI_InterlockedXor16:
5363 case Builtin::BI_InterlockedXor:
5364 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5365
5366 case Builtin::BI_bittest64:
5367 case Builtin::BI_bittest:
5368 case Builtin::BI_bittestandcomplement64:
5369 case Builtin::BI_bittestandcomplement:
5370 case Builtin::BI_bittestandreset64:
5371 case Builtin::BI_bittestandreset:
5372 case Builtin::BI_bittestandset64:
5373 case Builtin::BI_bittestandset:
5374 case Builtin::BI_interlockedbittestandreset:
5375 case Builtin::BI_interlockedbittestandreset64:
5376 case Builtin::BI_interlockedbittestandset64:
5377 case Builtin::BI_interlockedbittestandset:
5378 case Builtin::BI_interlockedbittestandset_acq:
5379 case Builtin::BI_interlockedbittestandset_rel:
5380 case Builtin::BI_interlockedbittestandset_nf:
5381 case Builtin::BI_interlockedbittestandreset_acq:
5382 case Builtin::BI_interlockedbittestandreset_rel:
5383 case Builtin::BI_interlockedbittestandreset_nf:
5384 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5385
5386 // These builtins exist to emit regular volatile loads and stores not
5387 // affected by the -fms-volatile setting.
5388 case Builtin::BI__iso_volatile_load8:
5389 case Builtin::BI__iso_volatile_load16:
5390 case Builtin::BI__iso_volatile_load32:
5391 case Builtin::BI__iso_volatile_load64:
5392 return RValue::get(EmitISOVolatileLoad(*this, E));
5393 case Builtin::BI__iso_volatile_store8:
5394 case Builtin::BI__iso_volatile_store16:
5395 case Builtin::BI__iso_volatile_store32:
5396 case Builtin::BI__iso_volatile_store64:
5397 return RValue::get(EmitISOVolatileStore(*this, E));
5398
5399 case Builtin::BI__builtin_ptrauth_sign_constant:
5400 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5401
5402 case Builtin::BI__builtin_ptrauth_auth:
5403 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5404 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5405 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5406 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5407 case Builtin::BI__builtin_ptrauth_strip: {
5408 // Emit the arguments.
5410 for (auto argExpr : E->arguments())
5411 Args.push_back(EmitScalarExpr(argExpr));
5412
5413 // Cast the value to intptr_t, saving its original type.
5414 llvm::Type *OrigValueType = Args[0]->getType();
5415 if (OrigValueType->isPointerTy())
5416 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5417
5418 switch (BuiltinID) {
5419 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5420 if (Args[4]->getType()->isPointerTy())
5421 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5422 [[fallthrough]];
5423
5424 case Builtin::BI__builtin_ptrauth_auth:
5425 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5426 if (Args[2]->getType()->isPointerTy())
5427 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5428 break;
5429
5430 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5431 if (Args[1]->getType()->isPointerTy())
5432 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5433 break;
5434
5435 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5436 case Builtin::BI__builtin_ptrauth_strip:
5437 break;
5438 }
5439
5440 // Call the intrinsic.
5441 auto IntrinsicID = [&]() -> unsigned {
5442 switch (BuiltinID) {
5443 case Builtin::BI__builtin_ptrauth_auth:
5444 return llvm::Intrinsic::ptrauth_auth;
5445 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5446 return llvm::Intrinsic::ptrauth_resign;
5447 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5448 return llvm::Intrinsic::ptrauth_blend;
5449 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5450 return llvm::Intrinsic::ptrauth_sign_generic;
5451 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5452 return llvm::Intrinsic::ptrauth_sign;
5453 case Builtin::BI__builtin_ptrauth_strip:
5454 return llvm::Intrinsic::ptrauth_strip;
5455 }
5456 llvm_unreachable("bad ptrauth intrinsic");
5457 }();
5458 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5459 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5460
5461 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5462 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5463 OrigValueType->isPointerTy()) {
5464 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5465 }
5466 return RValue::get(Result);
5467 }
5468
5469 case Builtin::BI__exception_code:
5470 case Builtin::BI_exception_code:
5472 case Builtin::BI__exception_info:
5473 case Builtin::BI_exception_info:
5475 case Builtin::BI__abnormal_termination:
5476 case Builtin::BI_abnormal_termination:
5478 case Builtin::BI_setjmpex:
5479 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5480 E->getArg(0)->getType()->isPointerType())
5481 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5482 break;
5483 case Builtin::BI_setjmp:
5484 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5485 E->getArg(0)->getType()->isPointerType()) {
5486 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5487 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5488 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5489 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5490 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5491 }
5492 break;
5493
5494 // C++ std:: builtins.
5495 case Builtin::BImove:
5496 case Builtin::BImove_if_noexcept:
5497 case Builtin::BIforward:
5498 case Builtin::BIforward_like:
5499 case Builtin::BIas_const:
5500 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5501 case Builtin::BI__GetExceptionInfo: {
5502 if (llvm::GlobalVariable *GV =
5504 return RValue::get(GV);
5505 break;
5506 }
5507
5508 case Builtin::BI__fastfail:
5509 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5510
5511 case Builtin::BI__builtin_coro_id:
5512 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5513 case Builtin::BI__builtin_coro_promise:
5514 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5515 case Builtin::BI__builtin_coro_resume:
5516 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5517 return RValue::get(nullptr);
5518 case Builtin::BI__builtin_coro_frame:
5519 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5520 case Builtin::BI__builtin_coro_noop:
5521 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5522 case Builtin::BI__builtin_coro_free:
5523 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5524 case Builtin::BI__builtin_coro_destroy:
5525 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5526 return RValue::get(nullptr);
5527 case Builtin::BI__builtin_coro_done:
5528 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5529 case Builtin::BI__builtin_coro_alloc:
5530 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5531 case Builtin::BI__builtin_coro_begin:
5532 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5533 case Builtin::BI__builtin_coro_end:
5534 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5535 case Builtin::BI__builtin_coro_suspend:
5536 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5537 case Builtin::BI__builtin_coro_size:
5538 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5539 case Builtin::BI__builtin_coro_align:
5540 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5541
5542 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5543 case Builtin::BIread_pipe:
5544 case Builtin::BIwrite_pipe: {
5545 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5546 *Arg1 = EmitScalarExpr(E->getArg(1));
5547 CGOpenCLRuntime OpenCLRT(CGM);
5548 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5549 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5550
5551 // Type of the generic packet parameter.
5552 unsigned GenericAS =
5554 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5555
5556 // Testing which overloaded version we should generate the call for.
5557 if (2U == E->getNumArgs()) {
5558 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5559 : "__write_pipe_2";
5560 // Creating a generic function type to be able to call with any builtin or
5561 // user defined type.
5562 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5563 llvm::FunctionType *FTy = llvm::FunctionType::get(
5564 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5565 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
5566 return RValue::get(
5568 {Arg0, BCast, PacketSize, PacketAlign}));
5569 } else {
5570 assert(4 == E->getNumArgs() &&
5571 "Illegal number of parameters to pipe function");
5572 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5573 : "__write_pipe_4";
5574
5575 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5576 Int32Ty, Int32Ty};
5577 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5578 *Arg3 = EmitScalarExpr(E->getArg(3));
5579 llvm::FunctionType *FTy = llvm::FunctionType::get(
5580 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5581 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
5582 // We know the third argument is an integer type, but we may need to cast
5583 // it to i32.
5584 if (Arg2->getType() != Int32Ty)
5585 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5586 return RValue::get(
5588 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
5589 }
5590 }
5591 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5592 // functions
5593 case Builtin::BIreserve_read_pipe:
5594 case Builtin::BIreserve_write_pipe:
5595 case Builtin::BIwork_group_reserve_read_pipe:
5596 case Builtin::BIwork_group_reserve_write_pipe:
5597 case Builtin::BIsub_group_reserve_read_pipe:
5598 case Builtin::BIsub_group_reserve_write_pipe: {
5599 // Composing the mangled name for the function.
5600 const char *Name;
5601 if (BuiltinID == Builtin::BIreserve_read_pipe)
5602 Name = "__reserve_read_pipe";
5603 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5604 Name = "__reserve_write_pipe";
5605 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5606 Name = "__work_group_reserve_read_pipe";
5607 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5608 Name = "__work_group_reserve_write_pipe";
5609 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5610 Name = "__sub_group_reserve_read_pipe";
5611 else
5612 Name = "__sub_group_reserve_write_pipe";
5613
5614 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5615 *Arg1 = EmitScalarExpr(E->getArg(1));
5616 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5617 CGOpenCLRuntime OpenCLRT(CGM);
5618 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5619 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5620
5621 // Building the generic function prototype.
5622 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5623 llvm::FunctionType *FTy = llvm::FunctionType::get(
5624 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5625 // We know the second argument is an integer type, but we may need to cast
5626 // it to i32.
5627 if (Arg1->getType() != Int32Ty)
5628 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5630 {Arg0, Arg1, PacketSize, PacketAlign}));
5631 }
5632 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5633 // functions
5634 case Builtin::BIcommit_read_pipe:
5635 case Builtin::BIcommit_write_pipe:
5636 case Builtin::BIwork_group_commit_read_pipe:
5637 case Builtin::BIwork_group_commit_write_pipe:
5638 case Builtin::BIsub_group_commit_read_pipe:
5639 case Builtin::BIsub_group_commit_write_pipe: {
5640 const char *Name;
5641 if (BuiltinID == Builtin::BIcommit_read_pipe)
5642 Name = "__commit_read_pipe";
5643 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5644 Name = "__commit_write_pipe";
5645 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5646 Name = "__work_group_commit_read_pipe";
5647 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5648 Name = "__work_group_commit_write_pipe";
5649 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5650 Name = "__sub_group_commit_read_pipe";
5651 else
5652 Name = "__sub_group_commit_write_pipe";
5653
5654 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5655 *Arg1 = EmitScalarExpr(E->getArg(1));
5656 CGOpenCLRuntime OpenCLRT(CGM);
5657 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5658 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5659
5660 // Building the generic function prototype.
5661 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5662 llvm::FunctionType *FTy =
5663 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5664 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5665
5667 {Arg0, Arg1, PacketSize, PacketAlign}));
5668 }
5669 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5670 case Builtin::BIget_pipe_num_packets:
5671 case Builtin::BIget_pipe_max_packets: {
5672 const char *BaseName;
5673 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5674 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5675 BaseName = "__get_pipe_num_packets";
5676 else
5677 BaseName = "__get_pipe_max_packets";
5678 std::string Name = std::string(BaseName) +
5679 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
5680
5681 // Building the generic function prototype.
5682 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5683 CGOpenCLRuntime OpenCLRT(CGM);
5684 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5685 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5686 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
5687 llvm::FunctionType *FTy = llvm::FunctionType::get(
5688 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5689
5691 {Arg0, PacketSize, PacketAlign}));
5692 }
5693
5694 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
5695 case Builtin::BIto_global:
5696 case Builtin::BIto_local:
5697 case Builtin::BIto_private: {
5698 auto Arg0 = EmitScalarExpr(E->getArg(0));
5699 auto NewArgT = llvm::PointerType::get(
5702 auto NewRetT = llvm::PointerType::get(
5706 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
5707 llvm::Value *NewArg;
5708 if (Arg0->getType()->getPointerAddressSpace() !=
5709 NewArgT->getPointerAddressSpace())
5710 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
5711 else
5712 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
5713 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
5714 auto NewCall =
5715 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
5716 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
5717 ConvertType(E->getType())));
5718 }
5719
5720 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5721 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
5722 // The code below expands the builtin call to a call to one of the following
5723 // functions that an OpenCL runtime library will have to provide:
5724 // __enqueue_kernel_basic
5725 // __enqueue_kernel_varargs
5726 // __enqueue_kernel_basic_events
5727 // __enqueue_kernel_events_varargs
5728 case Builtin::BIenqueue_kernel: {
5729 StringRef Name; // Generated function call name
5730 unsigned NumArgs = E->getNumArgs();
5731
5732 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5733 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5734 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5735
5736 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
5737 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
5738 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
5739 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
5740 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
5741
5742 if (NumArgs == 4) {
5743 // The most basic form of the call with parameters:
5744 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5745 Name = "__enqueue_kernel_basic";
5746 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5747 GenericVoidPtrTy};
5748 llvm::FunctionType *FTy = llvm::FunctionType::get(
5749 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5750
5751 auto Info =
5752 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5753 llvm::Value *Kernel =
5754 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5755 llvm::Value *Block =
5756 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5757
5758 AttrBuilder B(Builder.getContext());
5759 B.addByValAttr(NDRangeL.getAddress().getElementType());
5760 llvm::AttributeList ByValAttrSet =
5761 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
5762
5763 auto RTCall =
5764 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
5765 {Queue, Flags, Range, Kernel, Block});
5766 RTCall->setAttributes(ByValAttrSet);
5767 return RValue::get(RTCall);
5768 }
5769 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5770
5771 // Create a temporary array to hold the sizes of local pointer arguments
5772 // for the block. \p First is the position of the first size argument.
5773 auto CreateArrayForSizeVar = [=](unsigned First)
5774 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5775 llvm::APInt ArraySize(32, NumArgs - First);
5777 getContext().getSizeType(), ArraySize, nullptr,
5779 /*IndexTypeQuals=*/0);
5780 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
5781 llvm::Value *TmpPtr = Tmp.getPointer();
5782 llvm::Value *TmpSize = EmitLifetimeStart(
5783 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5784 llvm::Value *ElemPtr;
5785 // Each of the following arguments specifies the size of the corresponding
5786 // argument passed to the enqueued block.
5787 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
5788 for (unsigned I = First; I < NumArgs; ++I) {
5789 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5790 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5791 {Zero, Index});
5792 if (I == First)
5793 ElemPtr = GEP;
5794 auto *V =
5795 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
5797 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
5798 }
5799 return std::tie(ElemPtr, TmpSize, TmpPtr);
5800 };
5801
5802 // Could have events and/or varargs.
5803 if (E->getArg(3)->getType()->isBlockPointerType()) {
5804 // No events passed, but has variadic arguments.
5805 Name = "__enqueue_kernel_varargs";
5806 auto Info =
5807 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5808 llvm::Value *Kernel =
5809 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5810 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5811 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5812 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
5813
5814 // Create a vector of the arguments, as well as a constant value to
5815 // express to the runtime the number of variadic arguments.
5816 llvm::Value *const Args[] = {Queue, Flags,
5817 Range, Kernel,
5818 Block, ConstantInt::get(IntTy, NumArgs - 4),
5819 ElemPtr};
5820 llvm::Type *const ArgTys[] = {
5821 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
5822 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5823
5824 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
5825 auto Call = RValue::get(
5826 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
5827 if (TmpSize)
5828 EmitLifetimeEnd(TmpSize, TmpPtr);
5829 return Call;
5830 }
5831 // Any calls now have event arguments passed.
5832 if (NumArgs >= 7) {
5833 llvm::PointerType *PtrTy = llvm::PointerType::get(
5836
5837 llvm::Value *NumEvents =
5838 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
5839
5840 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5841 // to be a null pointer constant (including `0` literal), we can take it
5842 // into account and emit null pointer directly.
5843 llvm::Value *EventWaitList = nullptr;
5844 if (E->getArg(4)->isNullPointerConstant(
5846 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
5847 } else {
5848 EventWaitList =
5849 E->getArg(4)->getType()->isArrayType()
5850 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
5851 : EmitScalarExpr(E->getArg(4));
5852 // Convert to generic address space.
5853 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
5854 }
5855 llvm::Value *EventRet = nullptr;
5856 if (E->getArg(5)->isNullPointerConstant(
5858 EventRet = llvm::ConstantPointerNull::get(PtrTy);
5859 } else {
5860 EventRet =
5861 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
5862 }
5863
5864 auto Info =
5865 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
5866 llvm::Value *Kernel =
5867 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5868 llvm::Value *Block =
5869 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5870
5871 std::vector<llvm::Type *> ArgTys = {
5872 QueueTy, Int32Ty, RangeTy, Int32Ty,
5873 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
5874
5875 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
5876 NumEvents, EventWaitList, EventRet,
5877 Kernel, Block};
5878
5879 if (NumArgs == 7) {
5880 // Has events but no variadics.
5881 Name = "__enqueue_kernel_basic_events";
5882 llvm::FunctionType *FTy = llvm::FunctionType::get(
5883 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5884 return RValue::get(
5887 }
5888 // Has event info and variadics
5889 // Pass the number of variadics to the runtime function too.
5890 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5891 ArgTys.push_back(Int32Ty);
5892 Name = "__enqueue_kernel_events_varargs";
5893
5894 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5895 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5896 Args.push_back(ElemPtr);
5897 ArgTys.push_back(ElemPtr->getType());
5898
5899 llvm::FunctionType *FTy = llvm::FunctionType::get(
5900 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5901 auto Call =
5904 if (TmpSize)
5905 EmitLifetimeEnd(TmpSize, TmpPtr);
5906 return Call;
5907 }
5908 llvm_unreachable("Unexpected enqueue_kernel signature");
5909 }
5910 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5911 // parameter.
5912 case Builtin::BIget_kernel_work_group_size: {
5913 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5914 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5915 auto Info =
5916 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5917 Value *Kernel =
5918 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5919 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5922 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5923 false),
5924 "__get_kernel_work_group_size_impl"),
5925 {Kernel, Arg}));
5926 }
5927 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5928 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5929 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5930 auto Info =
5931 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5932 Value *Kernel =
5933 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5934 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5937 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5938 false),
5939 "__get_kernel_preferred_work_group_size_multiple_impl"),
5940 {Kernel, Arg}));
5941 }
5942 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5943 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5944 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5945 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5946 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5947 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
5948 auto Info =
5949 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
5950 Value *Kernel =
5951 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5952 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5953 const char *Name =
5954 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5955 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5956 : "__get_kernel_sub_group_count_for_ndrange_impl";
5959 llvm::FunctionType::get(
5960 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5961 false),
5962 Name),
5963 {NDRange, Kernel, Block}));
5964 }
5965 case Builtin::BI__builtin_store_half:
5966 case Builtin::BI__builtin_store_halff: {
5967 Value *Val = EmitScalarExpr(E->getArg(0));
5969 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5970 Builder.CreateStore(HalfVal, Address);
5971 return RValue::get(nullptr);
5972 }
5973 case Builtin::BI__builtin_load_half: {
5975 Value *HalfVal = Builder.CreateLoad(Address);
5976 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5977 }
5978 case Builtin::BI__builtin_load_halff: {
5980 Value *HalfVal = Builder.CreateLoad(Address);
5981 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
5982 }
5983 case Builtin::BI__builtin_printf:
5984 case Builtin::BIprintf:
5985 if (getTarget().getTriple().isNVPTX() ||
5986 getTarget().getTriple().isAMDGCN() ||
5987 (getTarget().getTriple().isSPIRV() &&
5988 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
5989 if (getLangOpts().OpenMPIsTargetDevice)
5991 if (getTarget().getTriple().isNVPTX())
5993 if ((getTarget().getTriple().isAMDGCN() ||
5994 getTarget().getTriple().isSPIRV()) &&
5995 getLangOpts().HIP)
5997 }
5998
5999 break;
6000 case Builtin::BI__builtin_canonicalize:
6001 case Builtin::BI__builtin_canonicalizef:
6002 case Builtin::BI__builtin_canonicalizef16:
6003 case Builtin::BI__builtin_canonicalizel:
6004 return RValue::get(
6005 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
6006
6007 case Builtin::BI__builtin_thread_pointer: {
6008 if (!getContext().getTargetInfo().isTLSSupported())
6009 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
6010 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6011 break;
6012 }
6013 case Builtin::BI__builtin_os_log_format:
6014 return emitBuiltinOSLogFormat(*E);
6015
6016 case Builtin::BI__xray_customevent: {
6018 return RValue::getIgnored();
6019
6022 return RValue::getIgnored();
6023
6024 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6025 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6026 return RValue::getIgnored();
6027
6028 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6029 auto FTy = F->getFunctionType();
6030 auto Arg0 = E->getArg(0);
6031 auto Arg0Val = EmitScalarExpr(Arg0);
6032 auto Arg0Ty = Arg0->getType();
6033 auto PTy0 = FTy->getParamType(0);
6034 if (PTy0 != Arg0Val->getType()) {
6035 if (Arg0Ty->isArrayType())
6036 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6037 else
6038 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6039 }
6040 auto Arg1 = EmitScalarExpr(E->getArg(1));
6041 auto PTy1 = FTy->getParamType(1);
6042 if (PTy1 != Arg1->getType())
6043 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6044 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6045 }
6046
6047 case Builtin::BI__xray_typedevent: {
6048 // TODO: There should be a way to always emit events even if the current
6049 // function is not instrumented. Losing events in a stream can cripple
6050 // a trace.
6052 return RValue::getIgnored();
6053
6056 return RValue::getIgnored();
6057
6058 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6059 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6060 return RValue::getIgnored();
6061
6062 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6063 auto FTy = F->getFunctionType();
6064 auto Arg0 = EmitScalarExpr(E->getArg(0));
6065 auto PTy0 = FTy->getParamType(0);
6066 if (PTy0 != Arg0->getType())
6067 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6068 auto Arg1 = E->getArg(1);
6069 auto Arg1Val = EmitScalarExpr(Arg1);
6070 auto Arg1Ty = Arg1->getType();
6071 auto PTy1 = FTy->getParamType(1);
6072 if (PTy1 != Arg1Val->getType()) {
6073 if (Arg1Ty->isArrayType())
6074 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6075 else
6076 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6077 }
6078 auto Arg2 = EmitScalarExpr(E->getArg(2));
6079 auto PTy2 = FTy->getParamType(2);
6080 if (PTy2 != Arg2->getType())
6081 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6082 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6083 }
6084
6085 case Builtin::BI__builtin_ms_va_start:
6086 case Builtin::BI__builtin_ms_va_end:
6087 return RValue::get(
6089 BuiltinID == Builtin::BI__builtin_ms_va_start));
6090
6091 case Builtin::BI__builtin_ms_va_copy: {
6092 // Lower this manually. We can't reliably determine whether or not any
6093 // given va_copy() is for a Win64 va_list from the calling convention
6094 // alone, because it's legal to do this from a System V ABI function.
6095 // With opaque pointer types, we won't have enough information in LLVM
6096 // IR to determine this from the argument types, either. Best to do it
6097 // now, while we have enough information.
6098 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6099 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6100
6101 DestAddr = DestAddr.withElementType(Int8PtrTy);
6102 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6103
6104 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6105 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6106 }
6107
6108 case Builtin::BI__builtin_get_device_side_mangled_name: {
6109 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6110 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6111 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6112 return RValue::get(Str.getPointer());
6113 }
6114 }
6115
6116 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6117 // the call using the normal call path, but using the unmangled
6118 // version of the function name.
6119 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6120 return emitLibraryCall(*this, FD, E,
6121 CGM.getBuiltinLibFunction(FD, BuiltinID));
6122
6123 // If this is a predefined lib function (e.g. malloc), emit the call
6124 // using exactly the normal call path.
6125 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6126 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6127
6128 // Check that a call to a target specific builtin has the correct target
6129 // features.
6130 // This is down here to avoid non-target specific builtins, however, if
6131 // generic builtins start to require generic target features then we
6132 // can move this up to the beginning of the function.
6134
6135 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6136 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6137
6138 // See if we have a target specific intrinsic.
6139 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6140 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6141 StringRef Prefix =
6142 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6143 if (!Prefix.empty()) {
6144 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6145 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6146 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6147 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6148 // NOTE we don't need to perform a compatibility flag check here since the
6149 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6150 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6151 if (IntrinsicID == Intrinsic::not_intrinsic)
6152 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6153 }
6154
6155 if (IntrinsicID != Intrinsic::not_intrinsic) {
6157
6158 // Find out if any arguments are required to be integer constant
6159 // expressions.
6160 unsigned ICEArguments = 0;
6162 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6163 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6164
6165 Function *F = CGM.getIntrinsic(IntrinsicID);
6166 llvm::FunctionType *FTy = F->getFunctionType();
6167
6168 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6169 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6170 // If the intrinsic arg type is different from the builtin arg type
6171 // we need to do a bit cast.
6172 llvm::Type *PTy = FTy->getParamType(i);
6173 if (PTy != ArgValue->getType()) {
6174 // XXX - vector of pointers?
6175 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6176 if (PtrTy->getAddressSpace() !=
6177 ArgValue->getType()->getPointerAddressSpace()) {
6178 ArgValue = Builder.CreateAddrSpaceCast(
6179 ArgValue, llvm::PointerType::get(getLLVMContext(),
6180 PtrTy->getAddressSpace()));
6181 }
6182 }
6183
6184 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6185 // in amx intrinsics.
6186 if (PTy->isX86_AMXTy())
6187 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6188 {ArgValue->getType()}, {ArgValue});
6189 else
6190 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6191 }
6192
6193 Args.push_back(ArgValue);
6194 }
6195
6196 Value *V = Builder.CreateCall(F, Args);
6197 QualType BuiltinRetType = E->getType();
6198
6199 llvm::Type *RetTy = VoidTy;
6200 if (!BuiltinRetType->isVoidType())
6201 RetTy = ConvertType(BuiltinRetType);
6202
6203 if (RetTy != V->getType()) {
6204 // XXX - vector of pointers?
6205 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6206 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6208 V, llvm::PointerType::get(getLLVMContext(),
6209 PtrTy->getAddressSpace()));
6210 }
6211 }
6212
6213 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6214 // in amx intrinsics.
6215 if (V->getType()->isX86_AMXTy())
6216 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6217 {V});
6218 else
6219 V = Builder.CreateBitCast(V, RetTy);
6220 }
6221
6222 if (RetTy->isVoidTy())
6223 return RValue::get(nullptr);
6224
6225 return RValue::get(V);
6226 }
6227
6228 // Some target-specific builtins can have aggregate return values, e.g.
6229 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6230 // ReturnValue to be non-null, so that the target-specific emission code can
6231 // always just emit into it.
6233 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6234 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6235 ReturnValue = ReturnValueSlot(DestPtr, false);
6236 }
6237
6238 // Now see if we can emit a target-specific builtin.
6239 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6240 switch (EvalKind) {
6241 case TEK_Scalar:
6242 if (V->getType()->isVoidTy())
6243 return RValue::get(nullptr);
6244 return RValue::get(V);
6245 case TEK_Aggregate:
6246 return RValue::getAggregate(ReturnValue.getAddress(),
6247 ReturnValue.isVolatile());
6248 case TEK_Complex:
6249 llvm_unreachable("No current target builtin returns complex");
6250 }
6251 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6252 }
6253
6254 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6255 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E))
6256 return RValue::get(V);
6257
6258 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6259 return EmitHipStdParUnsupportedBuiltin(this, FD);
6260
6261 ErrorUnsupported(E, "builtin function");
6262
6263 // Unknown builtin, for now just dump it out and return undef.
6264 return GetUndefRValue(E->getType());
6265}
6266
6268 unsigned BuiltinID, const CallExpr *E,
6269 ReturnValueSlot ReturnValue,
6270 llvm::Triple::ArchType Arch) {
6271 // When compiling in HipStdPar mode we have to be conservative in rejecting
6272 // target specific features in the FE, and defer the possible error to the
6273 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6274 // referenced by an accelerator executable function, we emit an error.
6275 // Returning nullptr here leads to the builtin being handled in
6276 // EmitStdParUnsupportedBuiltin.
6277 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6278 Arch != CGF->getTarget().getTriple().getArch())
6279 return nullptr;
6280
6281 switch (Arch) {
6282 case llvm::Triple::arm:
6283 case llvm::Triple::armeb:
6284 case llvm::Triple::thumb:
6285 case llvm::Triple::thumbeb:
6286 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6287 case llvm::Triple::aarch64:
6288 case llvm::Triple::aarch64_32:
6289 case llvm::Triple::aarch64_be:
6290 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6291 case llvm::Triple::bpfeb:
6292 case llvm::Triple::bpfel:
6293 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6294 case llvm::Triple::x86:
6295 case llvm::Triple::x86_64:
6296 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6297 case llvm::Triple::ppc:
6298 case llvm::Triple::ppcle:
6299 case llvm::Triple::ppc64:
6300 case llvm::Triple::ppc64le:
6301 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6302 case llvm::Triple::r600:
6303 case llvm::Triple::amdgcn:
6304 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6305 case llvm::Triple::systemz:
6306 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6307 case llvm::Triple::nvptx:
6308 case llvm::Triple::nvptx64:
6309 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6310 case llvm::Triple::wasm32:
6311 case llvm::Triple::wasm64:
6312 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6313 case llvm::Triple::hexagon:
6314 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6315 case llvm::Triple::riscv32:
6316 case llvm::Triple::riscv64:
6317 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6318 case llvm::Triple::spirv64:
6319 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6320 return nullptr;
6321 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6322 default:
6323 return nullptr;
6324 }
6325}
6326
6328 const CallExpr *E,
6329 ReturnValueSlot ReturnValue) {
6330 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6331 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6333 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6334 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6335 }
6336
6337 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6338 getTarget().getTriple().getArch());
6339}
6340
6341static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6342 NeonTypeFlags TypeFlags,
6343 bool HasLegalHalfType = true,
6344 bool V1Ty = false,
6345 bool AllowBFloatArgsAndRet = true) {
6346 int IsQuad = TypeFlags.isQuad();
6347 switch (TypeFlags.getEltType()) {
6350 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6353 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6355 if (AllowBFloatArgsAndRet)
6356 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6357 else
6358 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6360 if (HasLegalHalfType)
6361 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6362 else
6363 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6365 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6368 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6370 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6371 // There is a lot of i128 and f128 API missing.
6372 // so we use v16i8 to represent poly128 and get pattern matched.
6373 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6375 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6377 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6378 }
6379 llvm_unreachable("Unknown vector element type!");
6380}
6381
6382static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6383 NeonTypeFlags IntTypeFlags) {
6384 int IsQuad = IntTypeFlags.isQuad();
6385 switch (IntTypeFlags.getEltType()) {
6387 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6389 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6391 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6392 default:
6393 llvm_unreachable("Type can't be converted to floating-point!");
6394 }
6395}
6396
6398 const ElementCount &Count) {
6399 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6400 return Builder.CreateShuffleVector(V, V, SV, "lane");
6401}
6402
6404 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6405 return EmitNeonSplat(V, C, EC);
6406}
6407
6409 const char *name,
6410 unsigned shift, bool rightshift) {
6411 unsigned j = 0;
6412 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6413 ai != ae; ++ai, ++j) {
6414 if (F->isConstrainedFPIntrinsic())
6415 if (ai->getType()->isMetadataTy())
6416 continue;
6417 if (shift > 0 && shift == j)
6418 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6419 else
6420 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6421 }
6422
6423 if (F->isConstrainedFPIntrinsic())
6424 return Builder.CreateConstrainedFPCall(F, Ops, name);
6425 else
6426 return Builder.CreateCall(F, Ops, name);
6427}
6428
6430 bool neg) {
6431 int SV = cast<ConstantInt>(V)->getSExtValue();
6432 return ConstantInt::get(Ty, neg ? -SV : SV);
6433}
6434
6435// Right-shift a vector by a constant.
6437 llvm::Type *Ty, bool usgn,
6438 const char *name) {
6439 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6440
6441 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6442 int EltSize = VTy->getScalarSizeInBits();
6443
6444 Vec = Builder.CreateBitCast(Vec, Ty);
6445
6446 // lshr/ashr are undefined when the shift amount is equal to the vector
6447 // element size.
6448 if (ShiftAmt == EltSize) {
6449 if (usgn) {
6450 // Right-shifting an unsigned value by its size yields 0.
6451 return llvm::ConstantAggregateZero::get(VTy);
6452 } else {
6453 // Right-shifting a signed value by its size is equivalent
6454 // to a shift of size-1.
6455 --ShiftAmt;
6456 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6457 }
6458 }
6459
6460 Shift = EmitNeonShiftVector(Shift, Ty, false);
6461 if (usgn)
6462 return Builder.CreateLShr(Vec, Shift, name);
6463 else
6464 return Builder.CreateAShr(Vec, Shift, name);
6465}
6466
6467enum {
6468 AddRetType = (1 << 0),
6469 Add1ArgType = (1 << 1),
6470 Add2ArgTypes = (1 << 2),
6471
6474
6476 UnsignedAlts = (1 << 6),
6477
6480
6488
6489namespace {
6490struct ARMVectorIntrinsicInfo {
6491 const char *NameHint;
6492 unsigned BuiltinID;
6493 unsigned LLVMIntrinsic;
6494 unsigned AltLLVMIntrinsic;
6496
6497 bool operator<(unsigned RHSBuiltinID) const {
6498 return BuiltinID < RHSBuiltinID;
6499 }
6500 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6501 return BuiltinID < TE.BuiltinID;
6502 }
6503};
6504} // end anonymous namespace
6505
6506#define NEONMAP0(NameBase) \
6507 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6508
6509#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6510 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6511 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6512
6513#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6514 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6515 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6516 TypeModifier }
6517
6518static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6519 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6520 NEONMAP0(splat_lane_v),
6521 NEONMAP0(splat_laneq_v),
6522 NEONMAP0(splatq_lane_v),
6523 NEONMAP0(splatq_laneq_v),
6524 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6525 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6526 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6527 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6528 NEONMAP0(vadd_v),
6529 NEONMAP0(vaddhn_v),
6530 NEONMAP0(vaddq_v),
6531 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6532 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6533 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6534 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6535 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6536 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6537 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6538 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6539 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6540 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6541 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6542 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6543 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6544 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6545 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6546 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6547 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6548 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6549 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6550 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6551 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6552 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6553 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6554 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6555 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6556 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6557 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6558 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6559 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6560 NEONMAP0(vceqz_v),
6561 NEONMAP0(vceqzq_v),
6562 NEONMAP0(vcgez_v),
6563 NEONMAP0(vcgezq_v),
6564 NEONMAP0(vcgtz_v),
6565 NEONMAP0(vcgtzq_v),
6566 NEONMAP0(vclez_v),
6567 NEONMAP0(vclezq_v),
6568 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6569 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6570 NEONMAP0(vcltz_v),
6571 NEONMAP0(vcltzq_v),
6572 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6573 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6574 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6575 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6576 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6577 NEONMAP0(vcvt_f16_s16),
6578 NEONMAP0(vcvt_f16_u16),
6579 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6580 NEONMAP0(vcvt_f32_v),
6581 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6582 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6583 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6584 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6585 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6586 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6587 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6588 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6589 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6590 NEONMAP0(vcvt_s16_f16),
6591 NEONMAP0(vcvt_s32_v),
6592 NEONMAP0(vcvt_s64_v),
6593 NEONMAP0(vcvt_u16_f16),
6594 NEONMAP0(vcvt_u32_v),
6595 NEONMAP0(vcvt_u64_v),
6596 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6597 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6598 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6599 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6600 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6601 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6602 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6603 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6604 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6605 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6606 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6607 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6608 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6609 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6610 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6611 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6612 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6613 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6614 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6615 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6616 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6617 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6618 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6619 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6620 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6621 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6622 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6623 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6624 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6625 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6626 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6627 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6628 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6629 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6630 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6631 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6632 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6633 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6634 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6635 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6636 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6637 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6638 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6639 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6640 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6641 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6642 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6643 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6644 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6645 NEONMAP0(vcvtq_f16_s16),
6646 NEONMAP0(vcvtq_f16_u16),
6647 NEONMAP0(vcvtq_f32_v),
6648 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6649 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6650 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6651 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6652 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6653 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6654 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6655 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6656 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6657 NEONMAP0(vcvtq_s16_f16),
6658 NEONMAP0(vcvtq_s32_v),
6659 NEONMAP0(vcvtq_s64_v),
6660 NEONMAP0(vcvtq_u16_f16),
6661 NEONMAP0(vcvtq_u32_v),
6662 NEONMAP0(vcvtq_u64_v),
6663 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6664 NEONMAP1(vdot_u32, arm_neon_udot, 0),
6665 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6666 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6667 NEONMAP0(vext_v),
6668 NEONMAP0(vextq_v),
6669 NEONMAP0(vfma_v),
6670 NEONMAP0(vfmaq_v),
6671 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6672 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6673 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6674 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6675 NEONMAP0(vld1_dup_v),
6676 NEONMAP1(vld1_v, arm_neon_vld1, 0),
6677 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
6678 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
6679 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
6680 NEONMAP0(vld1q_dup_v),
6681 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
6682 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
6683 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
6684 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
6685 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
6686 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
6687 NEONMAP1(vld2_v, arm_neon_vld2, 0),
6688 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
6689 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
6690 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
6691 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
6692 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
6693 NEONMAP1(vld3_v, arm_neon_vld3, 0),
6694 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
6695 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
6696 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
6697 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
6698 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
6699 NEONMAP1(vld4_v, arm_neon_vld4, 0),
6700 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
6701 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
6702 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
6703 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6704 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
6705 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
6706 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6707 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6708 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
6709 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
6710 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6711 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
6712 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
6713 NEONMAP0(vmovl_v),
6714 NEONMAP0(vmovn_v),
6715 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
6716 NEONMAP0(vmull_v),
6717 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
6718 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6719 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6720 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
6721 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6722 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6723 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
6724 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
6725 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
6726 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
6727 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6728 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6729 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6730 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6731 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6732 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6733 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6734 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6735 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6736 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6737 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6738 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6739 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6740 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6741 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6742 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6743 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6744 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6745 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6746 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6747 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6748 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6749 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6750 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6751 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6752 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6753 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6754 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6755 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6756 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6757 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6758 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6759 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6760 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6761 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6762 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6763 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6764 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6765 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6766 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6767 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6768 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6769 NEONMAP0(vrndi_v),
6770 NEONMAP0(vrndiq_v),
6771 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6772 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6773 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6774 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6775 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6776 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6777 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6778 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6779 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6780 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6781 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6782 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6783 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6784 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6785 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6786 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6787 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6788 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6789 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6790 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6791 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6792 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6793 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6794 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6795 NEONMAP0(vshl_n_v),
6796 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6797 NEONMAP0(vshll_n_v),
6798 NEONMAP0(vshlq_n_v),
6799 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6800 NEONMAP0(vshr_n_v),
6801 NEONMAP0(vshrn_n_v),
6802 NEONMAP0(vshrq_n_v),
6803 NEONMAP1(vst1_v, arm_neon_vst1, 0),
6804 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6805 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6806 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6807 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6808 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6809 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6810 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6811 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6812 NEONMAP1(vst2_v, arm_neon_vst2, 0),
6813 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6814 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6815 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6816 NEONMAP1(vst3_v, arm_neon_vst3, 0),
6817 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6818 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6819 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6820 NEONMAP1(vst4_v, arm_neon_vst4, 0),
6821 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6822 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6823 NEONMAP0(vsubhn_v),
6824 NEONMAP0(vtrn_v),
6825 NEONMAP0(vtrnq_v),
6826 NEONMAP0(vtst_v),
6827 NEONMAP0(vtstq_v),
6828 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6829 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6830 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6831 NEONMAP0(vuzp_v),
6832 NEONMAP0(vuzpq_v),
6833 NEONMAP0(vzip_v),
6834 NEONMAP0(vzipq_v)
6835};
6836
6837static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6838 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6839 NEONMAP0(splat_lane_v),
6840 NEONMAP0(splat_laneq_v),
6841 NEONMAP0(splatq_lane_v),
6842 NEONMAP0(splatq_laneq_v),
6843 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6844 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6845 NEONMAP0(vadd_v),
6846 NEONMAP0(vaddhn_v),
6847 NEONMAP0(vaddq_p128),
6848 NEONMAP0(vaddq_v),
6849 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6850 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6851 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6852 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6853 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6854 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6855 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6856 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6857 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6858 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6859 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6860 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6861 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6862 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6863 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6864 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6865 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6866 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6867 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6868 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6869 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6870 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6871 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6872 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6873 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6874 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6875 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6876 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6877 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6878 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6879 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6880 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6881 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6882 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6883 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6884 NEONMAP0(vceqz_v),
6885 NEONMAP0(vceqzq_v),
6886 NEONMAP0(vcgez_v),
6887 NEONMAP0(vcgezq_v),
6888 NEONMAP0(vcgtz_v),
6889 NEONMAP0(vcgtzq_v),
6890 NEONMAP0(vclez_v),
6891 NEONMAP0(vclezq_v),
6892 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6893 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6894 NEONMAP0(vcltz_v),
6895 NEONMAP0(vcltzq_v),
6896 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6897 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6898 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6899 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6900 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6901 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6902 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6903 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6904 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6905 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6906 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6907 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6908 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6909 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6910 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6911 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6912 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6913 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6914 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6915 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6916 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6917 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6918 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6919 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6920 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6921 NEONMAP0(vcvt_f16_s16),
6922 NEONMAP0(vcvt_f16_u16),
6923 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6924 NEONMAP0(vcvt_f32_v),
6925 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6926 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6927 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6928 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6929 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6930 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6931 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6932 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6933 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6934 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6935 NEONMAP0(vcvtq_f16_s16),
6936 NEONMAP0(vcvtq_f16_u16),
6937 NEONMAP0(vcvtq_f32_v),
6938 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6939 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6940 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6941 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6942 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6943 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6944 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6945 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6946 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6947 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6948 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6949 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6950 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6951 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6952 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6953 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6954 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6955 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6956 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6957 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6958 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6959 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6960 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6961 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6962 NEONMAP0(vext_v),
6963 NEONMAP0(vextq_v),
6964 NEONMAP0(vfma_v),
6965 NEONMAP0(vfmaq_v),
6966 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6967 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6968 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6969 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6970 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6971 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6972 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6973 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6974 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6975 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6976 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6977 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6978 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6979 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6980 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6981 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6982 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6983 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6984 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
6985 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
6986 NEONMAP0(vmovl_v),
6987 NEONMAP0(vmovn_v),
6988 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6989 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6990 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6991 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6992 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6993 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6994 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6995 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6996 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6997 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6998 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6999 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
7000 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
7001 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7002 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
7003 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
7004 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7005 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
7006 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
7007 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
7008 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
7009 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
7010 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7011 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7012 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7013 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7014 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7015 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7016 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7017 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7018 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7019 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7020 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7021 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7022 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7023 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7024 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7025 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7026 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7027 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7028 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7029 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7030 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7031 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7032 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7033 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7034 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7035 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7036 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7037 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7038 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7039 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7040 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7041 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7042 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7043 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7044 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7045 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7046 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7047 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7048 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7049 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7050 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7051 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7052 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7053 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7054 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7055 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7056 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7057 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7058 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7059 NEONMAP0(vrndi_v),
7060 NEONMAP0(vrndiq_v),
7061 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7062 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7063 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7064 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7065 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7066 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7067 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7068 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7069 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7070 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7071 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7072 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7073 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7074 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7075 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7076 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7077 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7078 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7079 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7080 NEONMAP0(vshl_n_v),
7081 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7082 NEONMAP0(vshll_n_v),
7083 NEONMAP0(vshlq_n_v),
7084 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7085 NEONMAP0(vshr_n_v),
7086 NEONMAP0(vshrn_n_v),
7087 NEONMAP0(vshrq_n_v),
7088 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7089 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7090 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7091 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7092 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7093 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7094 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7095 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7096 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7097 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7098 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7099 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7100 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7101 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7102 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7103 NEONMAP0(vsubhn_v),
7104 NEONMAP0(vtst_v),
7105 NEONMAP0(vtstq_v),
7106 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7107 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7108 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7109 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7110};
7111
7112static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7113 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7114 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7115 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7116 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7117 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7118 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7119 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7120 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7121 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7122 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7123 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7124 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7125 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7126 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7127 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7128 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7129 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7130 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7131 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7132 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7133 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7134 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7135 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7136 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7137 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7138 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7139 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7140 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7141 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7142 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7143 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7144 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7145 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7146 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7147 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7148 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7149 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7150 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7151 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7152 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7153 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7154 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7155 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7156 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7157 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7158 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7159 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7160 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7161 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7162 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7163 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7164 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7165 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7166 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7167 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7168 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7169 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7170 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7171 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7172 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7173 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7174 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7175 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7176 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7177 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7178 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7179 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7180 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7181 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7182 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7183 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7184 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7185 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7186 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7187 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7188 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7189 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7190 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7191 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7192 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7193 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7194 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7195 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7196 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7197 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7198 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7199 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7200 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7201 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7202 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7203 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7204 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7205 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7206 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7207 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7208 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7209 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7210 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7211 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7212 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7213 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7214 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7215 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7216 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7217 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7218 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7219 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7220 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7221 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7222 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7223 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7224 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7225 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7226 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7227 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7228 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7229 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7230 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7231 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7232 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7233 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7234 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7235 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7236 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7237 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7238 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7239 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7240 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7241 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7242 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7243 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7244 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7245 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7246 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7247 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7248 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7249 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7250 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7251 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7252 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7253 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7254 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7255 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7256 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7257 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7258 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7259 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7260 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7261 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7262 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7263 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7264 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7265 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7266 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7267 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7268 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7269 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7270 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7271 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7272 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7273 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7274 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7275 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7276 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7277 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7278 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7279 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7280 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7281 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7282 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7283 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7284 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7285 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7286 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7287 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7288 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7289 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7290 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7291 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7292 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7293 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7294 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7295 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7296 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7297 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7298 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7299 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7300 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7301 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7302 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7303 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7304 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7305 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7306 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7307 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7308 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7309 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7310 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7311 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7312 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7313 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7314 // FP16 scalar intrinisics go here.
7315 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7316 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7317 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7318 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7319 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7320 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7321 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7322 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7323 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7324 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7325 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7326 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7327 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7328 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7329 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7330 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7331 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7332 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7333 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7334 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7335 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7336 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7337 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7338 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7339 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7340 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7341 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7342 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7343 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7344 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7345 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7346 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7347 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7348 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7349};
7350
7351// Some intrinsics are equivalent for codegen.
7352static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7353 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7354 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7355 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7356 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7357 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7358 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7359 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7360 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7361 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7362 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7363 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7364 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7365 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7366 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7367 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7368 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7369 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7370 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7371 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7372 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7373 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7374 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7375 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7376 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7377 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7378 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7379 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7380 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7381 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7382 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7383 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7384 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7385 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7386 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7387 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7388 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7389 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7390 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7391 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7392 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7393 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7394 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7395 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7396 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7397 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7398 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7399 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7400 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7401 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7402 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7403 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7404 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7405 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7406 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7407 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7408 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7409 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7410 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7411 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7412 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7413 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7414 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7415 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7416 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7417 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7418 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7419 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7420 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7421 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7422 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7423 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7424 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7425 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7426 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7427 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7428 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7429 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7430 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7431 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7432 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7433 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7434 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7435 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7436 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7437 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7438 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7439 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7440 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7441 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7442 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7443 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7444 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7445 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7446 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7447 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7448 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7449 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7450 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7451 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7452 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7453 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7454 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7455 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7456 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7457 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7458 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7459 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7460 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7461 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7462 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7463 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7464 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7465 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7466 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7467 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7468 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7469 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7470 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7471 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7472 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7473 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7474 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7475 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7476 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7477 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7478 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7479 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7480 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7481 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7482 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7483 // arbitrary one to be handled as tha canonical variation.
7484 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7485 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7486 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7487 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7488 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7489 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7490 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7491 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7492 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7493 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7494 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7495 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7496};
7497
7498#undef NEONMAP0
7499#undef NEONMAP1
7500#undef NEONMAP2
7501
7502#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7503 { \
7504 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7505 TypeModifier \
7506 }
7507
7508#define SVEMAP2(NameBase, TypeModifier) \
7509 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7510static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7511#define GET_SVE_LLVM_INTRINSIC_MAP
7512#include "clang/Basic/arm_sve_builtin_cg.inc"
7513#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7514#undef GET_SVE_LLVM_INTRINSIC_MAP
7515};
7516
7517#undef SVEMAP1
7518#undef SVEMAP2
7519
7520#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7521 { \
7522 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7523 TypeModifier \
7524 }
7525
7526#define SMEMAP2(NameBase, TypeModifier) \
7527 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7528static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7529#define GET_SME_LLVM_INTRINSIC_MAP
7530#include "clang/Basic/arm_sme_builtin_cg.inc"
7531#undef GET_SME_LLVM_INTRINSIC_MAP
7532};
7533
7534#undef SMEMAP1
7535#undef SMEMAP2
7536
7538
7543
7544static const ARMVectorIntrinsicInfo *
7546 unsigned BuiltinID, bool &MapProvenSorted) {
7547
7548#ifndef NDEBUG
7549 if (!MapProvenSorted) {
7550 assert(llvm::is_sorted(IntrinsicMap));
7551 MapProvenSorted = true;
7552 }
7553#endif
7554
7555 const ARMVectorIntrinsicInfo *Builtin =
7556 llvm::lower_bound(IntrinsicMap, BuiltinID);
7557
7558 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7559 return Builtin;
7560
7561 return nullptr;
7562}
7563
7565 unsigned Modifier,
7566 llvm::Type *ArgType,
7567 const CallExpr *E) {
7568 int VectorSize = 0;
7569 if (Modifier & Use64BitVectors)
7570 VectorSize = 64;
7571 else if (Modifier & Use128BitVectors)
7572 VectorSize = 128;
7573
7574 // Return type.
7576 if (Modifier & AddRetType) {
7577 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7578 if (Modifier & VectorizeRetType)
7579 Ty = llvm::FixedVectorType::get(
7580 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7581
7582 Tys.push_back(Ty);
7583 }
7584
7585 // Arguments.
7586 if (Modifier & VectorizeArgTypes) {
7587 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7588 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7589 }
7590
7591 if (Modifier & (Add1ArgType | Add2ArgTypes))
7592 Tys.push_back(ArgType);
7593
7594 if (Modifier & Add2ArgTypes)
7595 Tys.push_back(ArgType);
7596
7597 if (Modifier & InventFloatType)
7598 Tys.push_back(FloatTy);
7599
7600 return CGM.getIntrinsic(IntrinsicID, Tys);
7601}
7602
7604 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7605 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7606 unsigned BuiltinID = SISDInfo.BuiltinID;
7607 unsigned int Int = SISDInfo.LLVMIntrinsic;
7608 unsigned Modifier = SISDInfo.TypeModifier;
7609 const char *s = SISDInfo.NameHint;
7610
7611 switch (BuiltinID) {
7612 case NEON::BI__builtin_neon_vcled_s64:
7613 case NEON::BI__builtin_neon_vcled_u64:
7614 case NEON::BI__builtin_neon_vcles_f32:
7615 case NEON::BI__builtin_neon_vcled_f64:
7616 case NEON::BI__builtin_neon_vcltd_s64:
7617 case NEON::BI__builtin_neon_vcltd_u64:
7618 case NEON::BI__builtin_neon_vclts_f32:
7619 case NEON::BI__builtin_neon_vcltd_f64:
7620 case NEON::BI__builtin_neon_vcales_f32:
7621 case NEON::BI__builtin_neon_vcaled_f64:
7622 case NEON::BI__builtin_neon_vcalts_f32:
7623 case NEON::BI__builtin_neon_vcaltd_f64:
7624 // Only one direction of comparisons actually exist, cmle is actually a cmge
7625 // with swapped operands. The table gives us the right intrinsic but we
7626 // still need to do the swap.
7627 std::swap(Ops[0], Ops[1]);
7628 break;
7629 }
7630
7631 assert(Int && "Generic code assumes a valid intrinsic");
7632
7633 // Determine the type(s) of this overloaded AArch64 intrinsic.
7634 const Expr *Arg = E->getArg(0);
7635 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7636 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7637
7638 int j = 0;
7639 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7640 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7641 ai != ae; ++ai, ++j) {
7642 llvm::Type *ArgTy = ai->getType();
7643 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7644 ArgTy->getPrimitiveSizeInBits())
7645 continue;
7646
7647 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7648 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7649 // it before inserting.
7650 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7651 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7652 Ops[j] =
7653 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7654 }
7655
7656 Value *Result = CGF.EmitNeonCall(F, Ops, s);
7657 llvm::Type *ResultType = CGF.ConvertType(E->getType());
7658 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7659 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7660 return CGF.Builder.CreateExtractElement(Result, C0);
7661
7662 return CGF.Builder.CreateBitCast(Result, ResultType, s);
7663}
7664
7666 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7667 const char *NameHint, unsigned Modifier, const CallExpr *E,
7668 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
7669 llvm::Triple::ArchType Arch) {
7670 // Get the last argument, which specifies the vector type.
7671 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
7672 std::optional<llvm::APSInt> NeonTypeConst =
7674 if (!NeonTypeConst)
7675 return nullptr;
7676
7677 // Determine the type of this overloaded NEON intrinsic.
7678 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
7679 bool Usgn = Type.isUnsigned();
7680 bool Quad = Type.isQuad();
7681 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
7682 const bool AllowBFloatArgsAndRet =
7683 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
7684
7685 llvm::FixedVectorType *VTy =
7686 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
7687 llvm::Type *Ty = VTy;
7688 if (!Ty)
7689 return nullptr;
7690
7691 auto getAlignmentValue32 = [&](Address addr) -> Value* {
7692 return Builder.getInt32(addr.getAlignment().getQuantity());
7693 };
7694
7695 unsigned Int = LLVMIntrinsic;
7696 if ((Modifier & UnsignedAlts) && !Usgn)
7697 Int = AltLLVMIntrinsic;
7698
7699 switch (BuiltinID) {
7700 default: break;
7701 case NEON::BI__builtin_neon_splat_lane_v:
7702 case NEON::BI__builtin_neon_splat_laneq_v:
7703 case NEON::BI__builtin_neon_splatq_lane_v:
7704 case NEON::BI__builtin_neon_splatq_laneq_v: {
7705 auto NumElements = VTy->getElementCount();
7706 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
7707 NumElements = NumElements * 2;
7708 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
7709 NumElements = NumElements.divideCoefficientBy(2);
7710
7711 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7712 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
7713 }
7714 case NEON::BI__builtin_neon_vpadd_v:
7715 case NEON::BI__builtin_neon_vpaddq_v:
7716 // We don't allow fp/int overloading of intrinsics.
7717 if (VTy->getElementType()->isFloatingPointTy() &&
7718 Int == Intrinsic::aarch64_neon_addp)
7719 Int = Intrinsic::aarch64_neon_faddp;
7720 break;
7721 case NEON::BI__builtin_neon_vabs_v:
7722 case NEON::BI__builtin_neon_vabsq_v:
7723 if (VTy->getElementType()->isFloatingPointTy())
7724 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
7725 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
7726 case NEON::BI__builtin_neon_vadd_v:
7727 case NEON::BI__builtin_neon_vaddq_v: {
7728 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
7729 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7730 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7731 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
7732 return Builder.CreateBitCast(Ops[0], Ty);
7733 }
7734 case NEON::BI__builtin_neon_vaddhn_v: {
7735 llvm::FixedVectorType *SrcTy =
7736 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7737
7738 // %sum = add <4 x i32> %lhs, %rhs
7739 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7740 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7741 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
7742
7743 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7744 Constant *ShiftAmt =
7745 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7746 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
7747
7748 // %res = trunc <4 x i32> %high to <4 x i16>
7749 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
7750 }
7751 case NEON::BI__builtin_neon_vcale_v:
7752 case NEON::BI__builtin_neon_vcaleq_v:
7753 case NEON::BI__builtin_neon_vcalt_v:
7754 case NEON::BI__builtin_neon_vcaltq_v:
7755 std::swap(Ops[0], Ops[1]);
7756 [[fallthrough]];
7757 case NEON::BI__builtin_neon_vcage_v:
7758 case NEON::BI__builtin_neon_vcageq_v:
7759 case NEON::BI__builtin_neon_vcagt_v:
7760 case NEON::BI__builtin_neon_vcagtq_v: {
7761 llvm::Type *Ty;
7762 switch (VTy->getScalarSizeInBits()) {
7763 default: llvm_unreachable("unexpected type");
7764 case 32:
7765 Ty = FloatTy;
7766 break;
7767 case 64:
7768 Ty = DoubleTy;
7769 break;
7770 case 16:
7771 Ty = HalfTy;
7772 break;
7773 }
7774 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
7775 llvm::Type *Tys[] = { VTy, VecFlt };
7776 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7777 return EmitNeonCall(F, Ops, NameHint);
7778 }
7779 case NEON::BI__builtin_neon_vceqz_v:
7780 case NEON::BI__builtin_neon_vceqzq_v:
7781 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
7782 ICmpInst::ICMP_EQ, "vceqz");
7783 case NEON::BI__builtin_neon_vcgez_v:
7784 case NEON::BI__builtin_neon_vcgezq_v:
7785 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
7786 ICmpInst::ICMP_SGE, "vcgez");
7787 case NEON::BI__builtin_neon_vclez_v:
7788 case NEON::BI__builtin_neon_vclezq_v:
7789 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
7790 ICmpInst::ICMP_SLE, "vclez");
7791 case NEON::BI__builtin_neon_vcgtz_v:
7792 case NEON::BI__builtin_neon_vcgtzq_v:
7793 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
7794 ICmpInst::ICMP_SGT, "vcgtz");
7795 case NEON::BI__builtin_neon_vcltz_v:
7796 case NEON::BI__builtin_neon_vcltzq_v:
7797 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
7798 ICmpInst::ICMP_SLT, "vcltz");
7799 case NEON::BI__builtin_neon_vclz_v:
7800 case NEON::BI__builtin_neon_vclzq_v:
7801 // We generate target-independent intrinsic, which needs a second argument
7802 // for whether or not clz of zero is undefined; on ARM it isn't.
7803 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
7804 break;
7805 case NEON::BI__builtin_neon_vcvt_f32_v:
7806 case NEON::BI__builtin_neon_vcvtq_f32_v:
7807 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7808 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7809 HasLegalHalfType);
7810 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7811 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7812 case NEON::BI__builtin_neon_vcvt_f16_s16:
7813 case NEON::BI__builtin_neon_vcvt_f16_u16:
7814 case NEON::BI__builtin_neon_vcvtq_f16_s16:
7815 case NEON::BI__builtin_neon_vcvtq_f16_u16:
7816 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7817 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7818 HasLegalHalfType);
7819 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7820 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7821 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7822 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7823 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7824 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7825 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7826 Function *F = CGM.getIntrinsic(Int, Tys);
7827 return EmitNeonCall(F, Ops, "vcvt_n");
7828 }
7829 case NEON::BI__builtin_neon_vcvt_n_f32_v:
7830 case NEON::BI__builtin_neon_vcvt_n_f64_v:
7831 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7832 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7833 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7834 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7835 Function *F = CGM.getIntrinsic(Int, Tys);
7836 return EmitNeonCall(F, Ops, "vcvt_n");
7837 }
7838 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7839 case NEON::BI__builtin_neon_vcvt_n_s32_v:
7840 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7841 case NEON::BI__builtin_neon_vcvt_n_u32_v:
7842 case NEON::BI__builtin_neon_vcvt_n_s64_v:
7843 case NEON::BI__builtin_neon_vcvt_n_u64_v:
7844 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7845 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7846 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7847 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7848 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7849 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7850 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7851 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7852 return EmitNeonCall(F, Ops, "vcvt_n");
7853 }
7854 case NEON::BI__builtin_neon_vcvt_s32_v:
7855 case NEON::BI__builtin_neon_vcvt_u32_v:
7856 case NEON::BI__builtin_neon_vcvt_s64_v:
7857 case NEON::BI__builtin_neon_vcvt_u64_v:
7858 case NEON::BI__builtin_neon_vcvt_s16_f16:
7859 case NEON::BI__builtin_neon_vcvt_u16_f16:
7860 case NEON::BI__builtin_neon_vcvtq_s32_v:
7861 case NEON::BI__builtin_neon_vcvtq_u32_v:
7862 case NEON::BI__builtin_neon_vcvtq_s64_v:
7863 case NEON::BI__builtin_neon_vcvtq_u64_v:
7864 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7865 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7866 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7867 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
7868 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
7869 }
7870 case NEON::BI__builtin_neon_vcvta_s16_f16:
7871 case NEON::BI__builtin_neon_vcvta_s32_v:
7872 case NEON::BI__builtin_neon_vcvta_s64_v:
7873 case NEON::BI__builtin_neon_vcvta_u16_f16:
7874 case NEON::BI__builtin_neon_vcvta_u32_v:
7875 case NEON::BI__builtin_neon_vcvta_u64_v:
7876 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7877 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7878 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7879 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7880 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7881 case NEON::BI__builtin_neon_vcvtaq_u64_v:
7882 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7883 case NEON::BI__builtin_neon_vcvtn_s32_v:
7884 case NEON::BI__builtin_neon_vcvtn_s64_v:
7885 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7886 case NEON::BI__builtin_neon_vcvtn_u32_v:
7887 case NEON::BI__builtin_neon_vcvtn_u64_v:
7888 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7889 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7890 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7891 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7892 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7893 case NEON::BI__builtin_neon_vcvtnq_u64_v:
7894 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7895 case NEON::BI__builtin_neon_vcvtp_s32_v:
7896 case NEON::BI__builtin_neon_vcvtp_s64_v:
7897 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7898 case NEON::BI__builtin_neon_vcvtp_u32_v:
7899 case NEON::BI__builtin_neon_vcvtp_u64_v:
7900 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7901 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7902 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7903 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7904 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7905 case NEON::BI__builtin_neon_vcvtpq_u64_v:
7906 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7907 case NEON::BI__builtin_neon_vcvtm_s32_v:
7908 case NEON::BI__builtin_neon_vcvtm_s64_v:
7909 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7910 case NEON::BI__builtin_neon_vcvtm_u32_v:
7911 case NEON::BI__builtin_neon_vcvtm_u64_v:
7912 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7913 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7914 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7915 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7916 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7917 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7918 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7919 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7920 }
7921 case NEON::BI__builtin_neon_vcvtx_f32_v: {
7922 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7923 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7924
7925 }
7926 case NEON::BI__builtin_neon_vext_v:
7927 case NEON::BI__builtin_neon_vextq_v: {
7928 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
7929 SmallVector<int, 16> Indices;
7930 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7931 Indices.push_back(i+CV);
7932
7933 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7934 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7935 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
7936 }
7937 case NEON::BI__builtin_neon_vfma_v:
7938 case NEON::BI__builtin_neon_vfmaq_v: {
7939 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7940 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7941 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7942
7943 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7945 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7946 {Ops[1], Ops[2], Ops[0]});
7947 }
7948 case NEON::BI__builtin_neon_vld1_v:
7949 case NEON::BI__builtin_neon_vld1q_v: {
7950 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7951 Ops.push_back(getAlignmentValue32(PtrOp0));
7952 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
7953 }
7954 case NEON::BI__builtin_neon_vld1_x2_v:
7955 case NEON::BI__builtin_neon_vld1q_x2_v:
7956 case NEON::BI__builtin_neon_vld1_x3_v:
7957 case NEON::BI__builtin_neon_vld1q_x3_v:
7958 case NEON::BI__builtin_neon_vld1_x4_v:
7959 case NEON::BI__builtin_neon_vld1q_x4_v: {
7960 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7961 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7962 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7963 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7964 }
7965 case NEON::BI__builtin_neon_vld2_v:
7966 case NEON::BI__builtin_neon_vld2q_v:
7967 case NEON::BI__builtin_neon_vld3_v:
7968 case NEON::BI__builtin_neon_vld3q_v:
7969 case NEON::BI__builtin_neon_vld4_v:
7970 case NEON::BI__builtin_neon_vld4q_v:
7971 case NEON::BI__builtin_neon_vld2_dup_v:
7972 case NEON::BI__builtin_neon_vld2q_dup_v:
7973 case NEON::BI__builtin_neon_vld3_dup_v:
7974 case NEON::BI__builtin_neon_vld3q_dup_v:
7975 case NEON::BI__builtin_neon_vld4_dup_v:
7976 case NEON::BI__builtin_neon_vld4q_dup_v: {
7977 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7978 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7979 Value *Align = getAlignmentValue32(PtrOp1);
7980 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
7981 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7982 }
7983 case NEON::BI__builtin_neon_vld1_dup_v:
7984 case NEON::BI__builtin_neon_vld1q_dup_v: {
7985 Value *V = PoisonValue::get(Ty);
7986 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
7987 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
7988 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
7989 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
7990 return EmitNeonSplat(Ops[0], CI);
7991 }
7992 case NEON::BI__builtin_neon_vld2_lane_v:
7993 case NEON::BI__builtin_neon_vld2q_lane_v:
7994 case NEON::BI__builtin_neon_vld3_lane_v:
7995 case NEON::BI__builtin_neon_vld3q_lane_v:
7996 case NEON::BI__builtin_neon_vld4_lane_v:
7997 case NEON::BI__builtin_neon_vld4q_lane_v: {
7998 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7999 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8000 for (unsigned I = 2; I < Ops.size() - 1; ++I)
8001 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
8002 Ops.push_back(getAlignmentValue32(PtrOp1));
8003 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
8004 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8005 }
8006 case NEON::BI__builtin_neon_vmovl_v: {
8007 llvm::FixedVectorType *DTy =
8008 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8009 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
8010 if (Usgn)
8011 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8012 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8013 }
8014 case NEON::BI__builtin_neon_vmovn_v: {
8015 llvm::FixedVectorType *QTy =
8016 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8017 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8018 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8019 }
8020 case NEON::BI__builtin_neon_vmull_v:
8021 // FIXME: the integer vmull operations could be emitted in terms of pure
8022 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8023 // hoisting the exts outside loops. Until global ISel comes along that can
8024 // see through such movement this leads to bad CodeGen. So we need an
8025 // intrinsic for now.
8026 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8027 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8028 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8029 case NEON::BI__builtin_neon_vpadal_v:
8030 case NEON::BI__builtin_neon_vpadalq_v: {
8031 // The source operand type has twice as many elements of half the size.
8032 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8033 llvm::Type *EltTy =
8034 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8035 auto *NarrowTy =
8036 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8037 llvm::Type *Tys[2] = { Ty, NarrowTy };
8038 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8039 }
8040 case NEON::BI__builtin_neon_vpaddl_v:
8041 case NEON::BI__builtin_neon_vpaddlq_v: {
8042 // The source operand type has twice as many elements of half the size.
8043 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8044 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8045 auto *NarrowTy =
8046 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8047 llvm::Type *Tys[2] = { Ty, NarrowTy };
8048 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8049 }
8050 case NEON::BI__builtin_neon_vqdmlal_v:
8051 case NEON::BI__builtin_neon_vqdmlsl_v: {
8052 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8053 Ops[1] =
8054 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8055 Ops.resize(2);
8056 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8057 }
8058 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8059 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8060 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8061 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8062 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8063 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8064 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8065 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8066 RTy->getNumElements() * 2);
8067 llvm::Type *Tys[2] = {
8068 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8069 /*isQuad*/ false))};
8070 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8071 }
8072 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8073 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8074 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8075 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8076 llvm::Type *Tys[2] = {
8077 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8078 /*isQuad*/ true))};
8079 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8080 }
8081 case NEON::BI__builtin_neon_vqshl_n_v:
8082 case NEON::BI__builtin_neon_vqshlq_n_v:
8083 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8084 1, false);
8085 case NEON::BI__builtin_neon_vqshlu_n_v:
8086 case NEON::BI__builtin_neon_vqshluq_n_v:
8087 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8088 1, false);
8089 case NEON::BI__builtin_neon_vrecpe_v:
8090 case NEON::BI__builtin_neon_vrecpeq_v:
8091 case NEON::BI__builtin_neon_vrsqrte_v:
8092 case NEON::BI__builtin_neon_vrsqrteq_v:
8093 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8094 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8095 case NEON::BI__builtin_neon_vrndi_v:
8096 case NEON::BI__builtin_neon_vrndiq_v:
8097 Int = Builder.getIsFPConstrained()
8098 ? Intrinsic::experimental_constrained_nearbyint
8099 : Intrinsic::nearbyint;
8100 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8101 case NEON::BI__builtin_neon_vrshr_n_v:
8102 case NEON::BI__builtin_neon_vrshrq_n_v:
8103 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8104 1, true);
8105 case NEON::BI__builtin_neon_vsha512hq_u64:
8106 case NEON::BI__builtin_neon_vsha512h2q_u64:
8107 case NEON::BI__builtin_neon_vsha512su0q_u64:
8108 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8109 Function *F = CGM.getIntrinsic(Int);
8110 return EmitNeonCall(F, Ops, "");
8111 }
8112 case NEON::BI__builtin_neon_vshl_n_v:
8113 case NEON::BI__builtin_neon_vshlq_n_v:
8114 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8115 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8116 "vshl_n");
8117 case NEON::BI__builtin_neon_vshll_n_v: {
8118 llvm::FixedVectorType *SrcTy =
8119 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8120 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8121 if (Usgn)
8122 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8123 else
8124 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8125 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8126 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8127 }
8128 case NEON::BI__builtin_neon_vshrn_n_v: {
8129 llvm::FixedVectorType *SrcTy =
8130 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8131 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8132 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8133 if (Usgn)
8134 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8135 else
8136 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8137 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8138 }
8139 case NEON::BI__builtin_neon_vshr_n_v:
8140 case NEON::BI__builtin_neon_vshrq_n_v:
8141 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8142 case NEON::BI__builtin_neon_vst1_v:
8143 case NEON::BI__builtin_neon_vst1q_v:
8144 case NEON::BI__builtin_neon_vst2_v:
8145 case NEON::BI__builtin_neon_vst2q_v:
8146 case NEON::BI__builtin_neon_vst3_v:
8147 case NEON::BI__builtin_neon_vst3q_v:
8148 case NEON::BI__builtin_neon_vst4_v:
8149 case NEON::BI__builtin_neon_vst4q_v:
8150 case NEON::BI__builtin_neon_vst2_lane_v:
8151 case NEON::BI__builtin_neon_vst2q_lane_v:
8152 case NEON::BI__builtin_neon_vst3_lane_v:
8153 case NEON::BI__builtin_neon_vst3q_lane_v:
8154 case NEON::BI__builtin_neon_vst4_lane_v:
8155 case NEON::BI__builtin_neon_vst4q_lane_v: {
8156 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8157 Ops.push_back(getAlignmentValue32(PtrOp0));
8158 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8159 }
8160 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8161 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8162 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8163 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8164 case NEON::BI__builtin_neon_vsm4eq_u32: {
8165 Function *F = CGM.getIntrinsic(Int);
8166 return EmitNeonCall(F, Ops, "");
8167 }
8168 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8169 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8170 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8171 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8172 Function *F = CGM.getIntrinsic(Int);
8173 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8174 return EmitNeonCall(F, Ops, "");
8175 }
8176 case NEON::BI__builtin_neon_vst1_x2_v:
8177 case NEON::BI__builtin_neon_vst1q_x2_v:
8178 case NEON::BI__builtin_neon_vst1_x3_v:
8179 case NEON::BI__builtin_neon_vst1q_x3_v:
8180 case NEON::BI__builtin_neon_vst1_x4_v:
8181 case NEON::BI__builtin_neon_vst1q_x4_v: {
8182 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8183 // in AArch64 it comes last. We may want to stick to one or another.
8184 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8185 Arch == llvm::Triple::aarch64_32) {
8186 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8187 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8188 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8189 }
8190 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8191 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8192 }
8193 case NEON::BI__builtin_neon_vsubhn_v: {
8194 llvm::FixedVectorType *SrcTy =
8195 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8196
8197 // %sum = add <4 x i32> %lhs, %rhs
8198 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8199 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8200 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8201
8202 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8203 Constant *ShiftAmt =
8204 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8205 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8206
8207 // %res = trunc <4 x i32> %high to <4 x i16>
8208 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8209 }
8210 case NEON::BI__builtin_neon_vtrn_v:
8211 case NEON::BI__builtin_neon_vtrnq_v: {
8212 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8213 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8214 Value *SV = nullptr;
8215
8216 for (unsigned vi = 0; vi != 2; ++vi) {
8217 SmallVector<int, 16> Indices;
8218 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8219 Indices.push_back(i+vi);
8220 Indices.push_back(i+e+vi);
8221 }
8222 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8223 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8224 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8225 }
8226 return SV;
8227 }
8228 case NEON::BI__builtin_neon_vtst_v:
8229 case NEON::BI__builtin_neon_vtstq_v: {
8230 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8231 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8232 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8233 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8234 ConstantAggregateZero::get(Ty));
8235 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8236 }
8237 case NEON::BI__builtin_neon_vuzp_v:
8238 case NEON::BI__builtin_neon_vuzpq_v: {
8239 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8240 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8241 Value *SV = nullptr;
8242
8243 for (unsigned vi = 0; vi != 2; ++vi) {
8244 SmallVector<int, 16> Indices;
8245 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8246 Indices.push_back(2*i+vi);
8247
8248 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8249 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8250 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8251 }
8252 return SV;
8253 }
8254 case NEON::BI__builtin_neon_vxarq_u64: {
8255 Function *F = CGM.getIntrinsic(Int);
8256 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8257 return EmitNeonCall(F, Ops, "");
8258 }
8259 case NEON::BI__builtin_neon_vzip_v:
8260 case NEON::BI__builtin_neon_vzipq_v: {
8261 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8262 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8263 Value *SV = nullptr;
8264
8265 for (unsigned vi = 0; vi != 2; ++vi) {
8266 SmallVector<int, 16> Indices;
8267 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8268 Indices.push_back((i + vi*e) >> 1);
8269 Indices.push_back(((i + vi*e) >> 1)+e);
8270 }
8271 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8272 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8273 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8274 }
8275 return SV;
8276 }
8277 case NEON::BI__builtin_neon_vdot_s32:
8278 case NEON::BI__builtin_neon_vdot_u32:
8279 case NEON::BI__builtin_neon_vdotq_s32:
8280 case NEON::BI__builtin_neon_vdotq_u32: {
8281 auto *InputTy =
8282 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8283 llvm::Type *Tys[2] = { Ty, InputTy };
8284 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8285 }
8286 case NEON::BI__builtin_neon_vfmlal_low_f16:
8287 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8288 auto *InputTy =
8289 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8290 llvm::Type *Tys[2] = { Ty, InputTy };
8291 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8292 }
8293 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8294 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8295 auto *InputTy =
8296 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8297 llvm::Type *Tys[2] = { Ty, InputTy };
8298 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8299 }
8300 case NEON::BI__builtin_neon_vfmlal_high_f16:
8301 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8302 auto *InputTy =
8303 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8304 llvm::Type *Tys[2] = { Ty, InputTy };
8305 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8306 }
8307 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8308 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8309 auto *InputTy =
8310 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8311 llvm::Type *Tys[2] = { Ty, InputTy };
8312 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8313 }
8314 case NEON::BI__builtin_neon_vmmlaq_s32:
8315 case NEON::BI__builtin_neon_vmmlaq_u32: {
8316 auto *InputTy =
8317 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8318 llvm::Type *Tys[2] = { Ty, InputTy };
8319 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8320 }
8321 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8322 auto *InputTy =
8323 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8324 llvm::Type *Tys[2] = { Ty, InputTy };
8325 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8326 }
8327 case NEON::BI__builtin_neon_vusdot_s32:
8328 case NEON::BI__builtin_neon_vusdotq_s32: {
8329 auto *InputTy =
8330 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8331 llvm::Type *Tys[2] = { Ty, InputTy };
8332 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8333 }
8334 case NEON::BI__builtin_neon_vbfdot_f32:
8335 case NEON::BI__builtin_neon_vbfdotq_f32: {
8336 llvm::Type *InputTy =
8337 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8338 llvm::Type *Tys[2] = { Ty, InputTy };
8339 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8340 }
8341 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8342 llvm::Type *Tys[1] = { Ty };
8343 Function *F = CGM.getIntrinsic(Int, Tys);
8344 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8345 }
8346
8347 }
8348
8349 assert(Int && "Expected valid intrinsic number");
8350
8351 // Determine the type(s) of this overloaded AArch64 intrinsic.
8352 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8353
8354 Value *Result = EmitNeonCall(F, Ops, NameHint);
8355 llvm::Type *ResultType = ConvertType(E->getType());
8356 // AArch64 intrinsic one-element vector type cast to
8357 // scalar type expected by the builtin
8358 return Builder.CreateBitCast(Result, ResultType, NameHint);
8359}
8360
8362 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8363 const CmpInst::Predicate Ip, const Twine &Name) {
8364 llvm::Type *OTy = Op->getType();
8365
8366 // FIXME: this is utterly horrific. We should not be looking at previous
8367 // codegen context to find out what needs doing. Unfortunately TableGen
8368 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8369 // (etc).
8370 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8371 OTy = BI->getOperand(0)->getType();
8372
8373 Op = Builder.CreateBitCast(Op, OTy);
8374 if (OTy->getScalarType()->isFloatingPointTy()) {
8375 if (Fp == CmpInst::FCMP_OEQ)
8376 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8377 else
8378 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8379 } else {
8380 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8381 }
8382 return Builder.CreateSExt(Op, Ty, Name);
8383}
8384
8386 Value *ExtOp, Value *IndexOp,
8387 llvm::Type *ResTy, unsigned IntID,
8388 const char *Name) {
8390 if (ExtOp)
8391 TblOps.push_back(ExtOp);
8392
8393 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8394 SmallVector<int, 16> Indices;
8395 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8396 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8397 Indices.push_back(2*i);
8398 Indices.push_back(2*i+1);
8399 }
8400
8401 int PairPos = 0, End = Ops.size() - 1;
8402 while (PairPos < End) {
8403 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8404 Ops[PairPos+1], Indices,
8405 Name));
8406 PairPos += 2;
8407 }
8408
8409 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8410 // of the 128-bit lookup table with zero.
8411 if (PairPos == End) {
8412 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8413 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8414 ZeroTbl, Indices, Name));
8415 }
8416
8417 Function *TblF;
8418 TblOps.push_back(IndexOp);
8419 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8420
8421 return CGF.EmitNeonCall(TblF, TblOps, Name);
8422}
8423
8424Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8425 unsigned Value;
8426 switch (BuiltinID) {
8427 default:
8428 return nullptr;
8429 case clang::ARM::BI__builtin_arm_nop:
8430 Value = 0;
8431 break;
8432 case clang::ARM::BI__builtin_arm_yield:
8433 case clang::ARM::BI__yield:
8434 Value = 1;
8435 break;
8436 case clang::ARM::BI__builtin_arm_wfe:
8437 case clang::ARM::BI__wfe:
8438 Value = 2;
8439 break;
8440 case clang::ARM::BI__builtin_arm_wfi:
8441 case clang::ARM::BI__wfi:
8442 Value = 3;
8443 break;
8444 case clang::ARM::BI__builtin_arm_sev:
8445 case clang::ARM::BI__sev:
8446 Value = 4;
8447 break;
8448 case clang::ARM::BI__builtin_arm_sevl:
8449 case clang::ARM::BI__sevl:
8450 Value = 5;
8451 break;
8452 }
8453
8454 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8455 llvm::ConstantInt::get(Int32Ty, Value));
8456}
8457
8462};
8463
8464// Generates the IR for __builtin_read_exec_*.
8465// Lowers the builtin to amdgcn_ballot intrinsic.
8467 llvm::Type *RegisterType,
8468 llvm::Type *ValueType, bool isExecHi) {
8469 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8470 CodeGen::CodeGenModule &CGM = CGF.CGM;
8471
8472 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8473 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8474
8475 if (isExecHi) {
8476 Value *Rt2 = Builder.CreateLShr(Call, 32);
8477 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8478 return Rt2;
8479 }
8480
8481 return Call;
8482}
8483
8484// Generates the IR for the read/write special register builtin,
8485// ValueType is the type of the value that is to be written or read,
8486// RegisterType is the type of the register being written to or read from.
8488 const CallExpr *E,
8489 llvm::Type *RegisterType,
8490 llvm::Type *ValueType,
8491 SpecialRegisterAccessKind AccessKind,
8492 StringRef SysReg = "") {
8493 // write and register intrinsics only support 32, 64 and 128 bit operations.
8494 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8495 RegisterType->isIntegerTy(128)) &&
8496 "Unsupported size for register.");
8497
8498 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8499 CodeGen::CodeGenModule &CGM = CGF.CGM;
8500 LLVMContext &Context = CGM.getLLVMContext();
8501
8502 if (SysReg.empty()) {
8503 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8504 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8505 }
8506
8507 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8508 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8509 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8510
8511 llvm::Type *Types[] = { RegisterType };
8512
8513 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8514 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8515 && "Can't fit 64-bit value in 32-bit register");
8516
8517 if (AccessKind != Write) {
8518 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8519 llvm::Function *F = CGM.getIntrinsic(
8520 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8521 : llvm::Intrinsic::read_register,
8522 Types);
8523 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8524
8525 if (MixedTypes)
8526 // Read into 64 bit register and then truncate result to 32 bit.
8527 return Builder.CreateTrunc(Call, ValueType);
8528
8529 if (ValueType->isPointerTy())
8530 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8531 return Builder.CreateIntToPtr(Call, ValueType);
8532
8533 return Call;
8534 }
8535
8536 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8537 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8538 if (MixedTypes) {
8539 // Extend 32 bit write value to 64 bit to pass to write.
8540 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8541 return Builder.CreateCall(F, { Metadata, ArgValue });
8542 }
8543
8544 if (ValueType->isPointerTy()) {
8545 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8546 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8547 return Builder.CreateCall(F, { Metadata, ArgValue });
8548 }
8549
8550 return Builder.CreateCall(F, { Metadata, ArgValue });
8551}
8552
8553/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8554/// argument that specifies the vector type.
8555static bool HasExtraNeonArgument(unsigned BuiltinID) {
8556 switch (BuiltinID) {
8557 default: break;
8558 case NEON::BI__builtin_neon_vget_lane_i8:
8559 case NEON::BI__builtin_neon_vget_lane_i16:
8560 case NEON::BI__builtin_neon_vget_lane_bf16:
8561 case NEON::BI__builtin_neon_vget_lane_i32:
8562 case NEON::BI__builtin_neon_vget_lane_i64:
8563 case NEON::BI__builtin_neon_vget_lane_f32:
8564 case NEON::BI__builtin_neon_vgetq_lane_i8:
8565 case NEON::BI__builtin_neon_vgetq_lane_i16:
8566 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8567 case NEON::BI__builtin_neon_vgetq_lane_i32:
8568 case NEON::BI__builtin_neon_vgetq_lane_i64:
8569 case NEON::BI__builtin_neon_vgetq_lane_f32:
8570 case NEON::BI__builtin_neon_vduph_lane_bf16:
8571 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8572 case NEON::BI__builtin_neon_vset_lane_i8:
8573 case NEON::BI__builtin_neon_vset_lane_i16:
8574 case NEON::BI__builtin_neon_vset_lane_bf16:
8575 case NEON::BI__builtin_neon_vset_lane_i32:
8576 case NEON::BI__builtin_neon_vset_lane_i64:
8577 case NEON::BI__builtin_neon_vset_lane_f32:
8578 case NEON::BI__builtin_neon_vsetq_lane_i8:
8579 case NEON::BI__builtin_neon_vsetq_lane_i16:
8580 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8581 case NEON::BI__builtin_neon_vsetq_lane_i32:
8582 case NEON::BI__builtin_neon_vsetq_lane_i64:
8583 case NEON::BI__builtin_neon_vsetq_lane_f32:
8584 case NEON::BI__builtin_neon_vsha1h_u32:
8585 case NEON::BI__builtin_neon_vsha1cq_u32:
8586 case NEON::BI__builtin_neon_vsha1pq_u32:
8587 case NEON::BI__builtin_neon_vsha1mq_u32:
8588 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8589 case clang::ARM::BI_MoveToCoprocessor:
8590 case clang::ARM::BI_MoveToCoprocessor2:
8591 return false;
8592 }
8593 return true;
8594}
8595
8596Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8597 const CallExpr *E,
8598 ReturnValueSlot ReturnValue,
8599 llvm::Triple::ArchType Arch) {
8600 if (auto Hint = GetValueForARMHint(BuiltinID))
8601 return Hint;
8602
8603 if (BuiltinID == clang::ARM::BI__emit) {
8604 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8605 llvm::FunctionType *FTy =
8606 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8607
8609 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8610 llvm_unreachable("Sema will ensure that the parameter is constant");
8611
8612 llvm::APSInt Value = Result.Val.getInt();
8613 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8614
8615 llvm::InlineAsm *Emit =
8616 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8617 /*hasSideEffects=*/true)
8618 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8619 /*hasSideEffects=*/true);
8620
8621 return Builder.CreateCall(Emit);
8622 }
8623
8624 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8625 Value *Option = EmitScalarExpr(E->getArg(0));
8626 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8627 }
8628
8629 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8630 Value *Address = EmitScalarExpr(E->getArg(0));
8631 Value *RW = EmitScalarExpr(E->getArg(1));
8632 Value *IsData = EmitScalarExpr(E->getArg(2));
8633
8634 // Locality is not supported on ARM target
8635 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8636
8637 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8638 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8639 }
8640
8641 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8642 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8643 return Builder.CreateCall(
8644 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8645 }
8646
8647 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8648 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8649 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8650 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8651 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8652 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8653 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8654 return Res;
8655 }
8656
8657
8658 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8659 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8660 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8661 }
8662 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8663 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8664 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8665 "cls");
8666 }
8667
8668 if (BuiltinID == clang::ARM::BI__clear_cache) {
8669 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
8670 const FunctionDecl *FD = E->getDirectCallee();
8671 Value *Ops[2];
8672 for (unsigned i = 0; i < 2; i++)
8673 Ops[i] = EmitScalarExpr(E->getArg(i));
8674 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
8675 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
8676 StringRef Name = FD->getName();
8677 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
8678 }
8679
8680 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
8681 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
8682 Function *F;
8683
8684 switch (BuiltinID) {
8685 default: llvm_unreachable("unexpected builtin");
8686 case clang::ARM::BI__builtin_arm_mcrr:
8687 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
8688 break;
8689 case clang::ARM::BI__builtin_arm_mcrr2:
8690 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
8691 break;
8692 }
8693
8694 // MCRR{2} instruction has 5 operands but
8695 // the intrinsic has 4 because Rt and Rt2
8696 // are represented as a single unsigned 64
8697 // bit integer in the intrinsic definition
8698 // but internally it's represented as 2 32
8699 // bit integers.
8700
8701 Value *Coproc = EmitScalarExpr(E->getArg(0));
8702 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8703 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
8704 Value *CRm = EmitScalarExpr(E->getArg(3));
8705
8706 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8707 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
8708 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
8709 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
8710
8711 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
8712 }
8713
8714 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
8715 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
8716 Function *F;
8717
8718 switch (BuiltinID) {
8719 default: llvm_unreachable("unexpected builtin");
8720 case clang::ARM::BI__builtin_arm_mrrc:
8721 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
8722 break;
8723 case clang::ARM::BI__builtin_arm_mrrc2:
8724 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
8725 break;
8726 }
8727
8728 Value *Coproc = EmitScalarExpr(E->getArg(0));
8729 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8730 Value *CRm = EmitScalarExpr(E->getArg(2));
8731 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
8732
8733 // Returns an unsigned 64 bit integer, represented
8734 // as two 32 bit integers.
8735
8736 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
8737 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
8738 Rt = Builder.CreateZExt(Rt, Int64Ty);
8739 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
8740
8741 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
8742 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
8743 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
8744
8745 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
8746 }
8747
8748 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8749 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8750 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
8751 getContext().getTypeSize(E->getType()) == 64) ||
8752 BuiltinID == clang::ARM::BI__ldrexd) {
8753 Function *F;
8754
8755 switch (BuiltinID) {
8756 default: llvm_unreachable("unexpected builtin");
8757 case clang::ARM::BI__builtin_arm_ldaex:
8758 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8759 break;
8760 case clang::ARM::BI__builtin_arm_ldrexd:
8761 case clang::ARM::BI__builtin_arm_ldrex:
8762 case clang::ARM::BI__ldrexd:
8763 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8764 break;
8765 }
8766
8767 Value *LdPtr = EmitScalarExpr(E->getArg(0));
8768 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
8769
8770 Value *Val0 = Builder.CreateExtractValue(Val, 1);
8771 Value *Val1 = Builder.CreateExtractValue(Val, 0);
8772 Val0 = Builder.CreateZExt(Val0, Int64Ty);
8773 Val1 = Builder.CreateZExt(Val1, Int64Ty);
8774
8775 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
8776 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
8777 Val = Builder.CreateOr(Val, Val1);
8778 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
8779 }
8780
8781 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8782 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
8783 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
8784
8785 QualType Ty = E->getType();
8786 llvm::Type *RealResTy = ConvertType(Ty);
8787 llvm::Type *IntTy =
8788 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8789
8791 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
8792 : Intrinsic::arm_ldrex,
8793 UnqualPtrTy);
8794 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
8795 Val->addParamAttr(
8796 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8797
8798 if (RealResTy->isPointerTy())
8799 return Builder.CreateIntToPtr(Val, RealResTy);
8800 else {
8801 llvm::Type *IntResTy = llvm::IntegerType::get(
8802 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
8803 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
8804 RealResTy);
8805 }
8806 }
8807
8808 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8809 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8810 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
8811 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
8813 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
8814 : Intrinsic::arm_strexd);
8815 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
8816
8817 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8818 Value *Val = EmitScalarExpr(E->getArg(0));
8819 Builder.CreateStore(Val, Tmp);
8820
8821 Address LdPtr = Tmp.withElementType(STy);
8822 Val = Builder.CreateLoad(LdPtr);
8823
8824 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
8825 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
8826 Value *StPtr = EmitScalarExpr(E->getArg(1));
8827 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
8828 }
8829
8830 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8831 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
8832 Value *StoreVal = EmitScalarExpr(E->getArg(0));
8833 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
8834
8835 QualType Ty = E->getArg(0)->getType();
8836 llvm::Type *StoreTy =
8837 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8838
8839 if (StoreVal->getType()->isPointerTy())
8840 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
8841 else {
8842 llvm::Type *IntTy = llvm::IntegerType::get(
8844 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
8845 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
8846 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
8847 }
8848
8850 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
8851 : Intrinsic::arm_strex,
8852 StoreAddr->getType());
8853
8854 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
8855 CI->addParamAttr(
8856 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8857 return CI;
8858 }
8859
8860 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8861 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8862 return Builder.CreateCall(F);
8863 }
8864
8865 // CRC32
8866 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8867 switch (BuiltinID) {
8868 case clang::ARM::BI__builtin_arm_crc32b:
8869 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8870 case clang::ARM::BI__builtin_arm_crc32cb:
8871 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8872 case clang::ARM::BI__builtin_arm_crc32h:
8873 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8874 case clang::ARM::BI__builtin_arm_crc32ch:
8875 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8876 case clang::ARM::BI__builtin_arm_crc32w:
8877 case clang::ARM::BI__builtin_arm_crc32d:
8878 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8879 case clang::ARM::BI__builtin_arm_crc32cw:
8880 case clang::ARM::BI__builtin_arm_crc32cd:
8881 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8882 }
8883
8884 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8885 Value *Arg0 = EmitScalarExpr(E->getArg(0));
8886 Value *Arg1 = EmitScalarExpr(E->getArg(1));
8887
8888 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8889 // intrinsics, hence we need different codegen for these cases.
8890 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8891 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
8892 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8893 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
8894 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
8895 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
8896
8897 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8898 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
8899 return Builder.CreateCall(F, {Res, Arg1b});
8900 } else {
8901 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
8902
8903 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8904 return Builder.CreateCall(F, {Arg0, Arg1});
8905 }
8906 }
8907
8908 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8909 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8910 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8911 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
8912 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
8913 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
8914
8915 SpecialRegisterAccessKind AccessKind = Write;
8916 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8917 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8918 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
8919 AccessKind = VolatileRead;
8920
8921 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8922 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
8923
8924 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8925 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
8926
8927 llvm::Type *ValueType;
8928 llvm::Type *RegisterType;
8929 if (IsPointerBuiltin) {
8930 ValueType = VoidPtrTy;
8931 RegisterType = Int32Ty;
8932 } else if (Is64Bit) {
8933 ValueType = RegisterType = Int64Ty;
8934 } else {
8935 ValueType = RegisterType = Int32Ty;
8936 }
8937
8938 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
8939 AccessKind);
8940 }
8941
8942 if (BuiltinID == ARM::BI__builtin_sponentry) {
8943 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8944 return Builder.CreateCall(F);
8945 }
8946
8947 // Handle MSVC intrinsics before argument evaluation to prevent double
8948 // evaluation.
8949 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8950 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
8951
8952 // Deal with MVE builtins
8953 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8954 return Result;
8955 // Handle CDE builtins
8956 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8957 return Result;
8958
8959 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8960 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
8961 return P.first == BuiltinID;
8962 });
8963 if (It != end(NEONEquivalentIntrinsicMap))
8964 BuiltinID = It->second;
8965
8966 // Find out if any arguments are required to be integer constant
8967 // expressions.
8968 unsigned ICEArguments = 0;
8970 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8971 assert(Error == ASTContext::GE_None && "Should not codegen an error");
8972
8973 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8974 return Builder.getInt32(addr.getAlignment().getQuantity());
8975 };
8976
8977 Address PtrOp0 = Address::invalid();
8978 Address PtrOp1 = Address::invalid();
8980 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
8981 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
8982 for (unsigned i = 0, e = NumArgs; i != e; i++) {
8983 if (i == 0) {
8984 switch (BuiltinID) {
8985 case NEON::BI__builtin_neon_vld1_v:
8986 case NEON::BI__builtin_neon_vld1q_v:
8987 case NEON::BI__builtin_neon_vld1q_lane_v:
8988 case NEON::BI__builtin_neon_vld1_lane_v:
8989 case NEON::BI__builtin_neon_vld1_dup_v:
8990 case NEON::BI__builtin_neon_vld1q_dup_v:
8991 case NEON::BI__builtin_neon_vst1_v:
8992 case NEON::BI__builtin_neon_vst1q_v:
8993 case NEON::BI__builtin_neon_vst1q_lane_v:
8994 case NEON::BI__builtin_neon_vst1_lane_v:
8995 case NEON::BI__builtin_neon_vst2_v:
8996 case NEON::BI__builtin_neon_vst2q_v:
8997 case NEON::BI__builtin_neon_vst2_lane_v:
8998 case NEON::BI__builtin_neon_vst2q_lane_v:
8999 case NEON::BI__builtin_neon_vst3_v:
9000 case NEON::BI__builtin_neon_vst3q_v:
9001 case NEON::BI__builtin_neon_vst3_lane_v:
9002 case NEON::BI__builtin_neon_vst3q_lane_v:
9003 case NEON::BI__builtin_neon_vst4_v:
9004 case NEON::BI__builtin_neon_vst4q_v:
9005 case NEON::BI__builtin_neon_vst4_lane_v:
9006 case NEON::BI__builtin_neon_vst4q_lane_v:
9007 // Get the alignment for the argument in addition to the value;
9008 // we'll use it later.
9009 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
9010 Ops.push_back(PtrOp0.emitRawPointer(*this));
9011 continue;
9012 }
9013 }
9014 if (i == 1) {
9015 switch (BuiltinID) {
9016 case NEON::BI__builtin_neon_vld2_v:
9017 case NEON::BI__builtin_neon_vld2q_v:
9018 case NEON::BI__builtin_neon_vld3_v:
9019 case NEON::BI__builtin_neon_vld3q_v:
9020 case NEON::BI__builtin_neon_vld4_v:
9021 case NEON::BI__builtin_neon_vld4q_v:
9022 case NEON::BI__builtin_neon_vld2_lane_v:
9023 case NEON::BI__builtin_neon_vld2q_lane_v:
9024 case NEON::BI__builtin_neon_vld3_lane_v:
9025 case NEON::BI__builtin_neon_vld3q_lane_v:
9026 case NEON::BI__builtin_neon_vld4_lane_v:
9027 case NEON::BI__builtin_neon_vld4q_lane_v:
9028 case NEON::BI__builtin_neon_vld2_dup_v:
9029 case NEON::BI__builtin_neon_vld2q_dup_v:
9030 case NEON::BI__builtin_neon_vld3_dup_v:
9031 case NEON::BI__builtin_neon_vld3q_dup_v:
9032 case NEON::BI__builtin_neon_vld4_dup_v:
9033 case NEON::BI__builtin_neon_vld4q_dup_v:
9034 // Get the alignment for the argument in addition to the value;
9035 // we'll use it later.
9036 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9037 Ops.push_back(PtrOp1.emitRawPointer(*this));
9038 continue;
9039 }
9040 }
9041
9042 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9043 }
9044
9045 switch (BuiltinID) {
9046 default: break;
9047
9048 case NEON::BI__builtin_neon_vget_lane_i8:
9049 case NEON::BI__builtin_neon_vget_lane_i16:
9050 case NEON::BI__builtin_neon_vget_lane_i32:
9051 case NEON::BI__builtin_neon_vget_lane_i64:
9052 case NEON::BI__builtin_neon_vget_lane_bf16:
9053 case NEON::BI__builtin_neon_vget_lane_f32:
9054 case NEON::BI__builtin_neon_vgetq_lane_i8:
9055 case NEON::BI__builtin_neon_vgetq_lane_i16:
9056 case NEON::BI__builtin_neon_vgetq_lane_i32:
9057 case NEON::BI__builtin_neon_vgetq_lane_i64:
9058 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9059 case NEON::BI__builtin_neon_vgetq_lane_f32:
9060 case NEON::BI__builtin_neon_vduph_lane_bf16:
9061 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9062 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9063
9064 case NEON::BI__builtin_neon_vrndns_f32: {
9065 Value *Arg = EmitScalarExpr(E->getArg(0));
9066 llvm::Type *Tys[] = {Arg->getType()};
9067 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9068 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9069
9070 case NEON::BI__builtin_neon_vset_lane_i8:
9071 case NEON::BI__builtin_neon_vset_lane_i16:
9072 case NEON::BI__builtin_neon_vset_lane_i32:
9073 case NEON::BI__builtin_neon_vset_lane_i64:
9074 case NEON::BI__builtin_neon_vset_lane_bf16:
9075 case NEON::BI__builtin_neon_vset_lane_f32:
9076 case NEON::BI__builtin_neon_vsetq_lane_i8:
9077 case NEON::BI__builtin_neon_vsetq_lane_i16:
9078 case NEON::BI__builtin_neon_vsetq_lane_i32:
9079 case NEON::BI__builtin_neon_vsetq_lane_i64:
9080 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9081 case NEON::BI__builtin_neon_vsetq_lane_f32:
9082 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9083
9084 case NEON::BI__builtin_neon_vsha1h_u32:
9085 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9086 "vsha1h");
9087 case NEON::BI__builtin_neon_vsha1cq_u32:
9088 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9089 "vsha1h");
9090 case NEON::BI__builtin_neon_vsha1pq_u32:
9091 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9092 "vsha1h");
9093 case NEON::BI__builtin_neon_vsha1mq_u32:
9094 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9095 "vsha1h");
9096
9097 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9098 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9099 "vcvtbfp2bf");
9100 }
9101
9102 // The ARM _MoveToCoprocessor builtins put the input register value as
9103 // the first argument, but the LLVM intrinsic expects it as the third one.
9104 case clang::ARM::BI_MoveToCoprocessor:
9105 case clang::ARM::BI_MoveToCoprocessor2: {
9106 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9107 ? Intrinsic::arm_mcr
9108 : Intrinsic::arm_mcr2);
9109 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9110 Ops[3], Ops[4], Ops[5]});
9111 }
9112 }
9113
9114 // Get the last argument, which specifies the vector type.
9115 assert(HasExtraArg);
9116 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9117 std::optional<llvm::APSInt> Result =
9119 if (!Result)
9120 return nullptr;
9121
9122 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9123 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9124 // Determine the overloaded type of this builtin.
9125 llvm::Type *Ty;
9126 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9127 Ty = FloatTy;
9128 else
9129 Ty = DoubleTy;
9130
9131 // Determine whether this is an unsigned conversion or not.
9132 bool usgn = Result->getZExtValue() == 1;
9133 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9134
9135 // Call the appropriate intrinsic.
9136 Function *F = CGM.getIntrinsic(Int, Ty);
9137 return Builder.CreateCall(F, Ops, "vcvtr");
9138 }
9139
9140 // Determine the type of this overloaded NEON intrinsic.
9141 NeonTypeFlags Type = Result->getZExtValue();
9142 bool usgn = Type.isUnsigned();
9143 bool rightShift = false;
9144
9145 llvm::FixedVectorType *VTy =
9146 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9147 getTarget().hasBFloat16Type());
9148 llvm::Type *Ty = VTy;
9149 if (!Ty)
9150 return nullptr;
9151
9152 // Many NEON builtins have identical semantics and uses in ARM and
9153 // AArch64. Emit these in a single function.
9154 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9155 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9156 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9157 if (Builtin)
9159 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9160 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9161
9162 unsigned Int;
9163 switch (BuiltinID) {
9164 default: return nullptr;
9165 case NEON::BI__builtin_neon_vld1q_lane_v:
9166 // Handle 64-bit integer elements as a special case. Use shuffles of
9167 // one-element vectors to avoid poor code for i64 in the backend.
9168 if (VTy->getElementType()->isIntegerTy(64)) {
9169 // Extract the other lane.
9170 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9171 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9172 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9173 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9174 // Load the value as a one-element vector.
9175 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9176 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9177 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9178 Value *Align = getAlignmentValue32(PtrOp0);
9179 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9180 // Combine them.
9181 int Indices[] = {1 - Lane, Lane};
9182 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9183 }
9184 [[fallthrough]];
9185 case NEON::BI__builtin_neon_vld1_lane_v: {
9186 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9187 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9188 Value *Ld = Builder.CreateLoad(PtrOp0);
9189 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9190 }
9191 case NEON::BI__builtin_neon_vqrshrn_n_v:
9192 Int =
9193 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9194 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9195 1, true);
9196 case NEON::BI__builtin_neon_vqrshrun_n_v:
9197 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9198 Ops, "vqrshrun_n", 1, true);
9199 case NEON::BI__builtin_neon_vqshrn_n_v:
9200 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9201 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9202 1, true);
9203 case NEON::BI__builtin_neon_vqshrun_n_v:
9204 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9205 Ops, "vqshrun_n", 1, true);
9206 case NEON::BI__builtin_neon_vrecpe_v:
9207 case NEON::BI__builtin_neon_vrecpeq_v:
9208 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9209 Ops, "vrecpe");
9210 case NEON::BI__builtin_neon_vrshrn_n_v:
9211 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9212 Ops, "vrshrn_n", 1, true);
9213 case NEON::BI__builtin_neon_vrsra_n_v:
9214 case NEON::BI__builtin_neon_vrsraq_n_v:
9215 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9216 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9217 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9218 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9219 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9220 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9221 case NEON::BI__builtin_neon_vsri_n_v:
9222 case NEON::BI__builtin_neon_vsriq_n_v:
9223 rightShift = true;
9224 [[fallthrough]];
9225 case NEON::BI__builtin_neon_vsli_n_v:
9226 case NEON::BI__builtin_neon_vsliq_n_v:
9227 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9228 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9229 Ops, "vsli_n");
9230 case NEON::BI__builtin_neon_vsra_n_v:
9231 case NEON::BI__builtin_neon_vsraq_n_v:
9232 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9233 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9234 return Builder.CreateAdd(Ops[0], Ops[1]);
9235 case NEON::BI__builtin_neon_vst1q_lane_v:
9236 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9237 // a one-element vector and avoid poor code for i64 in the backend.
9238 if (VTy->getElementType()->isIntegerTy(64)) {
9239 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9240 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9241 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9242 Ops[2] = getAlignmentValue32(PtrOp0);
9243 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9244 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9245 Tys), Ops);
9246 }
9247 [[fallthrough]];
9248 case NEON::BI__builtin_neon_vst1_lane_v: {
9249 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9250 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9251 return Builder.CreateStore(Ops[1],
9252 PtrOp0.withElementType(Ops[1]->getType()));
9253 }
9254 case NEON::BI__builtin_neon_vtbl1_v:
9255 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9256 Ops, "vtbl1");
9257 case NEON::BI__builtin_neon_vtbl2_v:
9258 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9259 Ops, "vtbl2");
9260 case NEON::BI__builtin_neon_vtbl3_v:
9261 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9262 Ops, "vtbl3");
9263 case NEON::BI__builtin_neon_vtbl4_v:
9264 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9265 Ops, "vtbl4");
9266 case NEON::BI__builtin_neon_vtbx1_v:
9267 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9268 Ops, "vtbx1");
9269 case NEON::BI__builtin_neon_vtbx2_v:
9270 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9271 Ops, "vtbx2");
9272 case NEON::BI__builtin_neon_vtbx3_v:
9273 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9274 Ops, "vtbx3");
9275 case NEON::BI__builtin_neon_vtbx4_v:
9276 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9277 Ops, "vtbx4");
9278 }
9279}
9280
9281template<typename Integer>
9283 return E->getIntegerConstantExpr(Context)->getExtValue();
9284}
9285
9286static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9287 llvm::Type *T, bool Unsigned) {
9288 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9289 // which finds it convenient to specify signed/unsigned as a boolean flag.
9290 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9291}
9292
9293static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9294 uint32_t Shift, bool Unsigned) {
9295 // MVE helper function for integer shift right. This must handle signed vs
9296 // unsigned, and also deal specially with the case where the shift count is
9297 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9298 // undefined behavior, but in MVE it's legal, so we must convert it to code
9299 // that is not undefined in IR.
9300 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9301 ->getElementType()
9302 ->getPrimitiveSizeInBits();
9303 if (Shift == LaneBits) {
9304 // An unsigned shift of the full lane size always generates zero, so we can
9305 // simply emit a zero vector. A signed shift of the full lane size does the
9306 // same thing as shifting by one bit fewer.
9307 if (Unsigned)
9308 return llvm::Constant::getNullValue(V->getType());
9309 else
9310 --Shift;
9311 }
9312 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9313}
9314
9315static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9316 // MVE-specific helper function for a vector splat, which infers the element
9317 // count of the output vector by knowing that MVE vectors are all 128 bits
9318 // wide.
9319 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9320 return Builder.CreateVectorSplat(Elements, V);
9321}
9322
9323static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9324 CodeGenFunction *CGF,
9325 llvm::Value *V,
9326 llvm::Type *DestType) {
9327 // Convert one MVE vector type into another by reinterpreting its in-register
9328 // format.
9329 //
9330 // Little-endian, this is identical to a bitcast (which reinterprets the
9331 // memory format). But big-endian, they're not necessarily the same, because
9332 // the register and memory formats map to each other differently depending on
9333 // the lane size.
9334 //
9335 // We generate a bitcast whenever we can (if we're little-endian, or if the
9336 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9337 // that performs the different kind of reinterpretation.
9338 if (CGF->getTarget().isBigEndian() &&
9339 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9340 return Builder.CreateCall(
9341 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9342 {DestType, V->getType()}),
9343 V);
9344 } else {
9345 return Builder.CreateBitCast(V, DestType);
9346 }
9347}
9348
9349static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9350 // Make a shufflevector that extracts every other element of a vector (evens
9351 // or odds, as desired).
9352 SmallVector<int, 16> Indices;
9353 unsigned InputElements =
9354 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9355 for (unsigned i = 0; i < InputElements; i += 2)
9356 Indices.push_back(i + Odd);
9357 return Builder.CreateShuffleVector(V, Indices);
9358}
9359
9360static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9361 llvm::Value *V1) {
9362 // Make a shufflevector that interleaves two vectors element by element.
9363 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9364 SmallVector<int, 16> Indices;
9365 unsigned InputElements =
9366 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9367 for (unsigned i = 0; i < InputElements; i++) {
9368 Indices.push_back(i);
9369 Indices.push_back(i + InputElements);
9370 }
9371 return Builder.CreateShuffleVector(V0, V1, Indices);
9372}
9373
9374template<unsigned HighBit, unsigned OtherBits>
9375static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9376 // MVE-specific helper function to make a vector splat of a constant such as
9377 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9378 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9379 unsigned LaneBits = T->getPrimitiveSizeInBits();
9380 uint32_t Value = HighBit << (LaneBits - 1);
9381 if (OtherBits)
9382 Value |= (1UL << (LaneBits - 1)) - 1;
9383 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9384 return ARMMVEVectorSplat(Builder, Lane);
9385}
9386
9387static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9388 llvm::Value *V,
9389 unsigned ReverseWidth) {
9390 // MVE-specific helper function which reverses the elements of a
9391 // vector within every (ReverseWidth)-bit collection of lanes.
9392 SmallVector<int, 16> Indices;
9393 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9394 unsigned Elements = 128 / LaneSize;
9395 unsigned Mask = ReverseWidth / LaneSize - 1;
9396 for (unsigned i = 0; i < Elements; i++)
9397 Indices.push_back(i ^ Mask);
9398 return Builder.CreateShuffleVector(V, Indices);
9399}
9400
9402 const CallExpr *E,
9403 ReturnValueSlot ReturnValue,
9404 llvm::Triple::ArchType Arch) {
9405 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9406 Intrinsic::ID IRIntr;
9407 unsigned NumVectors;
9408
9409 // Code autogenerated by Tablegen will handle all the simple builtins.
9410 switch (BuiltinID) {
9411 #include "clang/Basic/arm_mve_builtin_cg.inc"
9412
9413 // If we didn't match an MVE builtin id at all, go back to the
9414 // main EmitARMBuiltinExpr.
9415 default:
9416 return nullptr;
9417 }
9418
9419 // Anything that breaks from that switch is an MVE builtin that
9420 // needs handwritten code to generate.
9421
9422 switch (CustomCodeGenType) {
9423
9424 case CustomCodeGen::VLD24: {
9427
9428 auto MvecCType = E->getType();
9429 auto MvecLType = ConvertType(MvecCType);
9430 assert(MvecLType->isStructTy() &&
9431 "Return type for vld[24]q should be a struct");
9432 assert(MvecLType->getStructNumElements() == 1 &&
9433 "Return-type struct for vld[24]q should have one element");
9434 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9435 assert(MvecLTypeInner->isArrayTy() &&
9436 "Return-type struct for vld[24]q should contain an array");
9437 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9438 "Array member of return-type struct vld[24]q has wrong length");
9439 auto VecLType = MvecLTypeInner->getArrayElementType();
9440
9441 Tys.push_back(VecLType);
9442
9443 auto Addr = E->getArg(0);
9444 Ops.push_back(EmitScalarExpr(Addr));
9445 Tys.push_back(ConvertType(Addr->getType()));
9446
9447 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9448 Value *LoadResult = Builder.CreateCall(F, Ops);
9449 Value *MvecOut = PoisonValue::get(MvecLType);
9450 for (unsigned i = 0; i < NumVectors; ++i) {
9451 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9452 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9453 }
9454
9455 if (ReturnValue.isNull())
9456 return MvecOut;
9457 else
9458 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9459 }
9460
9461 case CustomCodeGen::VST24: {
9464
9465 auto Addr = E->getArg(0);
9466 Ops.push_back(EmitScalarExpr(Addr));
9467 Tys.push_back(ConvertType(Addr->getType()));
9468
9469 auto MvecCType = E->getArg(1)->getType();
9470 auto MvecLType = ConvertType(MvecCType);
9471 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9472 assert(MvecLType->getStructNumElements() == 1 &&
9473 "Data-type struct for vst2q should have one element");
9474 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9475 assert(MvecLTypeInner->isArrayTy() &&
9476 "Data-type struct for vst2q should contain an array");
9477 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9478 "Array member of return-type struct vld[24]q has wrong length");
9479 auto VecLType = MvecLTypeInner->getArrayElementType();
9480
9481 Tys.push_back(VecLType);
9482
9483 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9484 EmitAggExpr(E->getArg(1), MvecSlot);
9485 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9486 for (unsigned i = 0; i < NumVectors; i++)
9487 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9488
9489 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9490 Value *ToReturn = nullptr;
9491 for (unsigned i = 0; i < NumVectors; i++) {
9492 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9493 ToReturn = Builder.CreateCall(F, Ops);
9494 Ops.pop_back();
9495 }
9496 return ToReturn;
9497 }
9498 }
9499 llvm_unreachable("unknown custom codegen type.");
9500}
9501
9503 const CallExpr *E,
9504 ReturnValueSlot ReturnValue,
9505 llvm::Triple::ArchType Arch) {
9506 switch (BuiltinID) {
9507 default:
9508 return nullptr;
9509#include "clang/Basic/arm_cde_builtin_cg.inc"
9510 }
9511}
9512
9513static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9514 const CallExpr *E,
9516 llvm::Triple::ArchType Arch) {
9517 unsigned int Int = 0;
9518 const char *s = nullptr;
9519
9520 switch (BuiltinID) {
9521 default:
9522 return nullptr;
9523 case NEON::BI__builtin_neon_vtbl1_v:
9524 case NEON::BI__builtin_neon_vqtbl1_v:
9525 case NEON::BI__builtin_neon_vqtbl1q_v:
9526 case NEON::BI__builtin_neon_vtbl2_v:
9527 case NEON::BI__builtin_neon_vqtbl2_v:
9528 case NEON::BI__builtin_neon_vqtbl2q_v:
9529 case NEON::BI__builtin_neon_vtbl3_v:
9530 case NEON::BI__builtin_neon_vqtbl3_v:
9531 case NEON::BI__builtin_neon_vqtbl3q_v:
9532 case NEON::BI__builtin_neon_vtbl4_v:
9533 case NEON::BI__builtin_neon_vqtbl4_v:
9534 case NEON::BI__builtin_neon_vqtbl4q_v:
9535 break;
9536 case NEON::BI__builtin_neon_vtbx1_v:
9537 case NEON::BI__builtin_neon_vqtbx1_v:
9538 case NEON::BI__builtin_neon_vqtbx1q_v:
9539 case NEON::BI__builtin_neon_vtbx2_v:
9540 case NEON::BI__builtin_neon_vqtbx2_v:
9541 case NEON::BI__builtin_neon_vqtbx2q_v:
9542 case NEON::BI__builtin_neon_vtbx3_v:
9543 case NEON::BI__builtin_neon_vqtbx3_v:
9544 case NEON::BI__builtin_neon_vqtbx3q_v:
9545 case NEON::BI__builtin_neon_vtbx4_v:
9546 case NEON::BI__builtin_neon_vqtbx4_v:
9547 case NEON::BI__builtin_neon_vqtbx4q_v:
9548 break;
9549 }
9550
9551 assert(E->getNumArgs() >= 3);
9552
9553 // Get the last argument, which specifies the vector type.
9554 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9555 std::optional<llvm::APSInt> Result =
9557 if (!Result)
9558 return nullptr;
9559
9560 // Determine the type of this overloaded NEON intrinsic.
9561 NeonTypeFlags Type = Result->getZExtValue();
9562 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9563 if (!Ty)
9564 return nullptr;
9565
9566 CodeGen::CGBuilderTy &Builder = CGF.Builder;
9567
9568 // AArch64 scalar builtins are not overloaded, they do not have an extra
9569 // argument that specifies the vector type, need to handle each case.
9570 switch (BuiltinID) {
9571 case NEON::BI__builtin_neon_vtbl1_v: {
9572 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9573 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9574 }
9575 case NEON::BI__builtin_neon_vtbl2_v: {
9576 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9577 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9578 }
9579 case NEON::BI__builtin_neon_vtbl3_v: {
9580 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9581 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9582 }
9583 case NEON::BI__builtin_neon_vtbl4_v: {
9584 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9585 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9586 }
9587 case NEON::BI__builtin_neon_vtbx1_v: {
9588 Value *TblRes =
9589 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9590 Intrinsic::aarch64_neon_tbl1, "vtbl1");
9591
9592 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9593 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9594 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9595
9596 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9597 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9598 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9599 }
9600 case NEON::BI__builtin_neon_vtbx2_v: {
9601 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9602 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9603 }
9604 case NEON::BI__builtin_neon_vtbx3_v: {
9605 Value *TblRes =
9606 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9607 Intrinsic::aarch64_neon_tbl2, "vtbl2");
9608
9609 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9610 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9611 TwentyFourV);
9612 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9613
9614 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9615 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9616 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9617 }
9618 case NEON::BI__builtin_neon_vtbx4_v: {
9619 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9620 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9621 }
9622 case NEON::BI__builtin_neon_vqtbl1_v:
9623 case NEON::BI__builtin_neon_vqtbl1q_v:
9624 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9625 case NEON::BI__builtin_neon_vqtbl2_v:
9626 case NEON::BI__builtin_neon_vqtbl2q_v: {
9627 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9628 case NEON::BI__builtin_neon_vqtbl3_v:
9629 case NEON::BI__builtin_neon_vqtbl3q_v:
9630 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9631 case NEON::BI__builtin_neon_vqtbl4_v:
9632 case NEON::BI__builtin_neon_vqtbl4q_v:
9633 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9634 case NEON::BI__builtin_neon_vqtbx1_v:
9635 case NEON::BI__builtin_neon_vqtbx1q_v:
9636 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9637 case NEON::BI__builtin_neon_vqtbx2_v:
9638 case NEON::BI__builtin_neon_vqtbx2q_v:
9639 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9640 case NEON::BI__builtin_neon_vqtbx3_v:
9641 case NEON::BI__builtin_neon_vqtbx3q_v:
9642 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9643 case NEON::BI__builtin_neon_vqtbx4_v:
9644 case NEON::BI__builtin_neon_vqtbx4q_v:
9645 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9646 }
9647 }
9648
9649 if (!Int)
9650 return nullptr;
9651
9652 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9653 return CGF.EmitNeonCall(F, Ops, s);
9654}
9655
9657 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9658 Op = Builder.CreateBitCast(Op, Int16Ty);
9659 Value *V = PoisonValue::get(VTy);
9660 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9661 Op = Builder.CreateInsertElement(V, Op, CI);
9662 return Op;
9663}
9664
9665/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9666/// access builtin. Only required if it can't be inferred from the base pointer
9667/// operand.
9668llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
9669 switch (TypeFlags.getMemEltType()) {
9670 case SVETypeFlags::MemEltTyDefault:
9671 return getEltType(TypeFlags);
9672 case SVETypeFlags::MemEltTyInt8:
9673 return Builder.getInt8Ty();
9674 case SVETypeFlags::MemEltTyInt16:
9675 return Builder.getInt16Ty();
9676 case SVETypeFlags::MemEltTyInt32:
9677 return Builder.getInt32Ty();
9678 case SVETypeFlags::MemEltTyInt64:
9679 return Builder.getInt64Ty();
9680 }
9681 llvm_unreachable("Unknown MemEltType");
9682}
9683
9684llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
9685 switch (TypeFlags.getEltType()) {
9686 default:
9687 llvm_unreachable("Invalid SVETypeFlag!");
9688
9689 case SVETypeFlags::EltTyInt8:
9690 return Builder.getInt8Ty();
9691 case SVETypeFlags::EltTyInt16:
9692 return Builder.getInt16Ty();
9693 case SVETypeFlags::EltTyInt32:
9694 return Builder.getInt32Ty();
9695 case SVETypeFlags::EltTyInt64:
9696 return Builder.getInt64Ty();
9697 case SVETypeFlags::EltTyInt128:
9698 return Builder.getInt128Ty();
9699
9700 case SVETypeFlags::EltTyFloat16:
9701 return Builder.getHalfTy();
9702 case SVETypeFlags::EltTyFloat32:
9703 return Builder.getFloatTy();
9704 case SVETypeFlags::EltTyFloat64:
9705 return Builder.getDoubleTy();
9706
9707 case SVETypeFlags::EltTyBFloat16:
9708 return Builder.getBFloatTy();
9709
9710 case SVETypeFlags::EltTyBool8:
9711 case SVETypeFlags::EltTyBool16:
9712 case SVETypeFlags::EltTyBool32:
9713 case SVETypeFlags::EltTyBool64:
9714 return Builder.getInt1Ty();
9715 }
9716}
9717
9718// Return the llvm predicate vector type corresponding to the specified element
9719// TypeFlags.
9720llvm::ScalableVectorType *
9722 switch (TypeFlags.getEltType()) {
9723 default: llvm_unreachable("Unhandled SVETypeFlag!");
9724
9725 case SVETypeFlags::EltTyInt8:
9726 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9727 case SVETypeFlags::EltTyInt16:
9728 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9729 case SVETypeFlags::EltTyInt32:
9730 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9731 case SVETypeFlags::EltTyInt64:
9732 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9733
9734 case SVETypeFlags::EltTyBFloat16:
9735 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9736 case SVETypeFlags::EltTyFloat16:
9737 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9738 case SVETypeFlags::EltTyFloat32:
9739 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9740 case SVETypeFlags::EltTyFloat64:
9741 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9742
9743 case SVETypeFlags::EltTyBool8:
9744 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9745 case SVETypeFlags::EltTyBool16:
9746 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9747 case SVETypeFlags::EltTyBool32:
9748 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9749 case SVETypeFlags::EltTyBool64:
9750 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9751 }
9752}
9753
9754// Return the llvm vector type corresponding to the specified element TypeFlags.
9755llvm::ScalableVectorType *
9756CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
9757 switch (TypeFlags.getEltType()) {
9758 default:
9759 llvm_unreachable("Invalid SVETypeFlag!");
9760
9761 case SVETypeFlags::EltTyInt8:
9762 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
9763 case SVETypeFlags::EltTyInt16:
9764 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
9765 case SVETypeFlags::EltTyInt32:
9766 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
9767 case SVETypeFlags::EltTyInt64:
9768 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
9769
9770 case SVETypeFlags::EltTyFloat16:
9771 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
9772 case SVETypeFlags::EltTyBFloat16:
9773 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
9774 case SVETypeFlags::EltTyFloat32:
9775 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
9776 case SVETypeFlags::EltTyFloat64:
9777 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
9778
9779 case SVETypeFlags::EltTyBool8:
9780 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9781 case SVETypeFlags::EltTyBool16:
9782 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9783 case SVETypeFlags::EltTyBool32:
9784 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9785 case SVETypeFlags::EltTyBool64:
9786 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9787 }
9788}
9789
9790llvm::Value *
9792 Function *Ptrue =
9793 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9794 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
9795}
9796
9797constexpr unsigned SVEBitsPerBlock = 128;
9798
9799static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9800 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9801 return llvm::ScalableVectorType::get(EltTy, NumElts);
9802}
9803
9804// Reinterpret the input predicate so that it can be used to correctly isolate
9805// the elements of the specified datatype.
9807 llvm::ScalableVectorType *VTy) {
9808
9809 if (isa<TargetExtType>(Pred->getType()) &&
9810 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
9811 return Pred;
9812
9813 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
9814 if (Pred->getType() == RTy)
9815 return Pred;
9816
9817 unsigned IntID;
9818 llvm::Type *IntrinsicTy;
9819 switch (VTy->getMinNumElements()) {
9820 default:
9821 llvm_unreachable("unsupported element count!");
9822 case 1:
9823 case 2:
9824 case 4:
9825 case 8:
9826 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9827 IntrinsicTy = RTy;
9828 break;
9829 case 16:
9830 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9831 IntrinsicTy = Pred->getType();
9832 break;
9833 }
9834
9835 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
9836 Value *C = Builder.CreateCall(F, Pred);
9837 assert(C->getType() == RTy && "Unexpected return type!");
9838 return C;
9839}
9840
9843 unsigned IntID) {
9844 auto *ResultTy = getSVEType(TypeFlags);
9845 auto *OverloadedTy =
9846 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
9847
9848 Function *F = nullptr;
9849 if (Ops[1]->getType()->isVectorTy())
9850 // This is the "vector base, scalar offset" case. In order to uniquely
9851 // map this built-in to an LLVM IR intrinsic, we need both the return type
9852 // and the type of the vector base.
9853 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
9854 else
9855 // This is the "scalar base, vector offset case". The type of the offset
9856 // is encoded in the name of the intrinsic. We only need to specify the
9857 // return type in order to uniquely map this built-in to an LLVM IR
9858 // intrinsic.
9859 F = CGM.getIntrinsic(IntID, OverloadedTy);
9860
9861 // At the ACLE level there's only one predicate type, svbool_t, which is
9862 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9863 // actual type being loaded. For example, when loading doubles (i64) the
9864 // predicate should be <n x 2 x i1> instead. At the IR level the type of
9865 // the predicate and the data being loaded must match. Cast to the type
9866 // expected by the intrinsic. The intrinsic itself should be defined in
9867 // a way than enforces relations between parameter types.
9868 Ops[0] = EmitSVEPredicateCast(
9869 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
9870
9871 // Pass 0 when the offset is missing. This can only be applied when using
9872 // the "vector base" addressing mode for which ACLE allows no offset. The
9873 // corresponding LLVM IR always requires an offset.
9874 if (Ops.size() == 2) {
9875 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9876 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9877 }
9878
9879 // For "vector base, scalar index" scale the index so that it becomes a
9880 // scalar offset.
9881 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
9882 unsigned BytesPerElt =
9883 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9884 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9885 }
9886
9887 Value *Call = Builder.CreateCall(F, Ops);
9888
9889 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9890 // other cases it's folded into a nop.
9891 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
9892 : Builder.CreateSExt(Call, ResultTy);
9893}
9894
9897 unsigned IntID) {
9898 auto *SrcDataTy = getSVEType(TypeFlags);
9899 auto *OverloadedTy =
9900 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
9901
9902 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9903 // it's the first argument. Move it accordingly.
9904 Ops.insert(Ops.begin(), Ops.pop_back_val());
9905
9906 Function *F = nullptr;
9907 if (Ops[2]->getType()->isVectorTy())
9908 // This is the "vector base, scalar offset" case. In order to uniquely
9909 // map this built-in to an LLVM IR intrinsic, we need both the return type
9910 // and the type of the vector base.
9911 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
9912 else
9913 // This is the "scalar base, vector offset case". The type of the offset
9914 // is encoded in the name of the intrinsic. We only need to specify the
9915 // return type in order to uniquely map this built-in to an LLVM IR
9916 // intrinsic.
9917 F = CGM.getIntrinsic(IntID, OverloadedTy);
9918
9919 // Pass 0 when the offset is missing. This can only be applied when using
9920 // the "vector base" addressing mode for which ACLE allows no offset. The
9921 // corresponding LLVM IR always requires an offset.
9922 if (Ops.size() == 3) {
9923 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9924 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9925 }
9926
9927 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9928 // folded into a nop.
9929 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
9930
9931 // At the ACLE level there's only one predicate type, svbool_t, which is
9932 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9933 // actual type being stored. For example, when storing doubles (i64) the
9934 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9935 // the predicate and the data being stored must match. Cast to the type
9936 // expected by the intrinsic. The intrinsic itself should be defined in
9937 // a way that enforces relations between parameter types.
9938 Ops[1] = EmitSVEPredicateCast(
9939 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
9940
9941 // For "vector base, scalar index" scale the index so that it becomes a
9942 // scalar offset.
9943 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
9944 unsigned BytesPerElt =
9945 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9946 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
9947 }
9948
9949 return Builder.CreateCall(F, Ops);
9950}
9951
9954 unsigned IntID) {
9955 // The gather prefetches are overloaded on the vector input - this can either
9956 // be the vector of base addresses or vector of offsets.
9957 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
9958 if (!OverloadedTy)
9959 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
9960
9961 // Cast the predicate from svbool_t to the right number of elements.
9962 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9963
9964 // vector + imm addressing modes
9965 if (Ops[1]->getType()->isVectorTy()) {
9966 if (Ops.size() == 3) {
9967 // Pass 0 for 'vector+imm' when the index is omitted.
9968 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9969
9970 // The sv_prfop is the last operand in the builtin and IR intrinsic.
9971 std::swap(Ops[2], Ops[3]);
9972 } else {
9973 // Index needs to be passed as scaled offset.
9974 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9975 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
9976 if (BytesPerElt > 1)
9977 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9978 }
9979 }
9980
9981 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
9982 return Builder.CreateCall(F, Ops);
9983}
9984
9987 unsigned IntID) {
9988 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9989
9990 unsigned N;
9991 switch (IntID) {
9992 case Intrinsic::aarch64_sve_ld2_sret:
9993 case Intrinsic::aarch64_sve_ld1_pn_x2:
9994 case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9995 case Intrinsic::aarch64_sve_ld2q_sret:
9996 N = 2;
9997 break;
9998 case Intrinsic::aarch64_sve_ld3_sret:
9999 case Intrinsic::aarch64_sve_ld3q_sret:
10000 N = 3;
10001 break;
10002 case Intrinsic::aarch64_sve_ld4_sret:
10003 case Intrinsic::aarch64_sve_ld1_pn_x4:
10004 case Intrinsic::aarch64_sve_ldnt1_pn_x4:
10005 case Intrinsic::aarch64_sve_ld4q_sret:
10006 N = 4;
10007 break;
10008 default:
10009 llvm_unreachable("unknown intrinsic!");
10010 }
10011 auto RetTy = llvm::VectorType::get(VTy->getElementType(),
10012 VTy->getElementCount() * N);
10013
10014 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10015 Value *BasePtr = Ops[1];
10016
10017 // Does the load have an offset?
10018 if (Ops.size() > 2)
10019 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10020
10021 Function *F = CGM.getIntrinsic(IntID, {VTy});
10022 Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
10023 unsigned MinElts = VTy->getMinNumElements();
10024 Value *Ret = llvm::PoisonValue::get(RetTy);
10025 for (unsigned I = 0; I < N; I++) {
10026 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10027 Value *SRet = Builder.CreateExtractValue(Call, I);
10028 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
10029 }
10030 return Ret;
10031}
10032
10035 unsigned IntID) {
10036 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10037
10038 unsigned N;
10039 switch (IntID) {
10040 case Intrinsic::aarch64_sve_st2:
10041 case Intrinsic::aarch64_sve_st1_pn_x2:
10042 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10043 case Intrinsic::aarch64_sve_st2q:
10044 N = 2;
10045 break;
10046 case Intrinsic::aarch64_sve_st3:
10047 case Intrinsic::aarch64_sve_st3q:
10048 N = 3;
10049 break;
10050 case Intrinsic::aarch64_sve_st4:
10051 case Intrinsic::aarch64_sve_st1_pn_x4:
10052 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10053 case Intrinsic::aarch64_sve_st4q:
10054 N = 4;
10055 break;
10056 default:
10057 llvm_unreachable("unknown intrinsic!");
10058 }
10059
10060 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10061 Value *BasePtr = Ops[1];
10062
10063 // Does the store have an offset?
10064 if (Ops.size() > (2 + N))
10065 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10066
10067 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10068 // need to break up the tuple vector.
10070 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10071 Operands.push_back(Ops[I]);
10072 Operands.append({Predicate, BasePtr});
10073 Function *F = CGM.getIntrinsic(IntID, { VTy });
10074
10075 return Builder.CreateCall(F, Operands);
10076}
10077
10078// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10079// svpmullt_pair intrinsics, with the exception that their results are bitcast
10080// to a wider type.
10083 unsigned BuiltinID) {
10084 // Splat scalar operand to vector (intrinsics with _n infix)
10085 if (TypeFlags.hasSplatOperand()) {
10086 unsigned OpNo = TypeFlags.getSplatOperand();
10087 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10088 }
10089
10090 // The pair-wise function has a narrower overloaded type.
10091 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10092 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10093
10094 // Now bitcast to the wider result type.
10095 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10096 return EmitSVEReinterpret(Call, Ty);
10097}
10098
10100 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10101 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10102 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10103 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10104}
10105
10108 unsigned BuiltinID) {
10109 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10110 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10111 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10112
10113 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10114 Value *BasePtr = Ops[1];
10115
10116 // Implement the index operand if not omitted.
10117 if (Ops.size() > 3)
10118 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10119
10120 Value *PrfOp = Ops.back();
10121
10122 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10123 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10124}
10125
10127 llvm::Type *ReturnTy,
10129 unsigned IntrinsicID,
10130 bool IsZExtReturn) {
10131 QualType LangPTy = E->getArg(1)->getType();
10132 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10133 LangPTy->castAs<PointerType>()->getPointeeType());
10134
10135 // The vector type that is returned may be different from the
10136 // eventual type loaded from memory.
10137 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10138 llvm::ScalableVectorType *MemoryTy = nullptr;
10139 llvm::ScalableVectorType *PredTy = nullptr;
10140 bool IsQuadLoad = false;
10141 switch (IntrinsicID) {
10142 case Intrinsic::aarch64_sve_ld1uwq:
10143 case Intrinsic::aarch64_sve_ld1udq:
10144 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10145 PredTy = llvm::ScalableVectorType::get(
10146 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10147 IsQuadLoad = true;
10148 break;
10149 default:
10150 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10151 PredTy = MemoryTy;
10152 break;
10153 }
10154
10155 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10156 Value *BasePtr = Ops[1];
10157
10158 // Does the load have an offset?
10159 if (Ops.size() > 2)
10160 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10161
10162 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10163 auto *Load =
10164 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10165 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10166 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10167
10168 if (IsQuadLoad)
10169 return Load;
10170
10171 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10172 : Builder.CreateSExt(Load, VectorTy);
10173}
10174
10177 unsigned IntrinsicID) {
10178 QualType LangPTy = E->getArg(1)->getType();
10179 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10180 LangPTy->castAs<PointerType>()->getPointeeType());
10181
10182 // The vector type that is stored may be different from the
10183 // eventual type stored to memory.
10184 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10185 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10186
10187 auto PredTy = MemoryTy;
10188 auto AddrMemoryTy = MemoryTy;
10189 bool IsQuadStore = false;
10190
10191 switch (IntrinsicID) {
10192 case Intrinsic::aarch64_sve_st1wq:
10193 case Intrinsic::aarch64_sve_st1dq:
10194 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10195 PredTy =
10196 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10197 IsQuadStore = true;
10198 break;
10199 default:
10200 break;
10201 }
10202 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10203 Value *BasePtr = Ops[1];
10204
10205 // Does the store have an offset?
10206 if (Ops.size() == 4)
10207 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10208
10209 // Last value is always the data
10210 Value *Val =
10211 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10212
10213 Function *F =
10214 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10215 auto *Store =
10216 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10217 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10218 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10219 return Store;
10220}
10221
10224 unsigned IntID) {
10225 Ops[2] = EmitSVEPredicateCast(
10227
10228 SmallVector<Value *> NewOps;
10229 NewOps.push_back(Ops[2]);
10230
10231 llvm::Value *BasePtr = Ops[3];
10232
10233 // If the intrinsic contains the vnum parameter, multiply it with the vector
10234 // size in bytes.
10235 if (Ops.size() == 5) {
10236 Function *StreamingVectorLength =
10237 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10238 llvm::Value *StreamingVectorLengthCall =
10239 Builder.CreateCall(StreamingVectorLength);
10240 llvm::Value *Mulvl =
10241 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10242 // The type of the ptr parameter is void *, so use Int8Ty here.
10243 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10244 }
10245 NewOps.push_back(BasePtr);
10246 NewOps.push_back(Ops[0]);
10247 NewOps.push_back(Ops[1]);
10248 Function *F = CGM.getIntrinsic(IntID);
10249 return Builder.CreateCall(F, NewOps);
10250}
10251
10254 unsigned IntID) {
10255 auto *VecTy = getSVEType(TypeFlags);
10256 Function *F = CGM.getIntrinsic(IntID, VecTy);
10257 if (TypeFlags.isReadZA())
10258 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10259 else if (TypeFlags.isWriteZA())
10260 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10261 return Builder.CreateCall(F, Ops);
10262}
10263
10266 unsigned IntID) {
10267 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10268 if (Ops.size() == 0)
10269 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10270 Function *F = CGM.getIntrinsic(IntID, {});
10271 return Builder.CreateCall(F, Ops);
10272}
10273
10276 unsigned IntID) {
10277 if (Ops.size() == 2)
10278 Ops.push_back(Builder.getInt32(0));
10279 else
10280 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10281 Function *F = CGM.getIntrinsic(IntID, {});
10282 return Builder.CreateCall(F, Ops);
10283}
10284
10285// Limit the usage of scalable llvm IR generated by the ACLE by using the
10286// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10287Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10288 return Builder.CreateVectorSplat(
10289 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10290}
10291
10293 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10294}
10295
10296Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10297 // FIXME: For big endian this needs an additional REV, or needs a separate
10298 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10299 // instruction is defined as 'bitwise' equivalent from memory point of
10300 // view (when storing/reloading), whereas the svreinterpret builtin
10301 // implements bitwise equivalent cast from register point of view.
10302 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10303 return Builder.CreateBitCast(Val, Ty);
10304}
10305
10306static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10308 auto *SplatZero = Constant::getNullValue(Ty);
10309 Ops.insert(Ops.begin(), SplatZero);
10310}
10311
10312static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10314 auto *SplatUndef = UndefValue::get(Ty);
10315 Ops.insert(Ops.begin(), SplatUndef);
10316}
10317
10320 llvm::Type *ResultType,
10321 ArrayRef<Value *> Ops) {
10322 if (TypeFlags.isOverloadNone())
10323 return {};
10324
10325 llvm::Type *DefaultType = getSVEType(TypeFlags);
10326
10327 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10328 return {DefaultType, Ops[1]->getType()};
10329
10330 if (TypeFlags.isOverloadWhileRW())
10331 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10332
10333 if (TypeFlags.isOverloadCvt())
10334 return {Ops[0]->getType(), Ops.back()->getType()};
10335
10336 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10337 ResultType->isVectorTy())
10338 return {ResultType, Ops[1]->getType()};
10339
10340 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10341 return {DefaultType};
10342}
10343
10345 llvm::Type *Ty,
10346 ArrayRef<Value *> Ops) {
10347 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10348 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10349
10350 unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10351 auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10352 TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10353
10354 if (!SingleVecTy)
10355 return nullptr;
10356
10357 Value *Idx = ConstantInt::get(CGM.Int64Ty,
10358 I * SingleVecTy->getMinNumElements());
10359
10360 if (TypeFlags.isTupleSet())
10361 return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10362 return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10363}
10364
10366 llvm::Type *Ty,
10367 ArrayRef<Value *> Ops) {
10368 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10369
10370 auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10371
10372 if (!SrcTy)
10373 return nullptr;
10374
10375 unsigned MinElts = SrcTy->getMinNumElements();
10376 Value *Call = llvm::PoisonValue::get(Ty);
10377 for (unsigned I = 0; I < Ops.size(); I++) {
10378 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10379 Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10380 }
10381
10382 return Call;
10383}
10384
10386 // Multi-vector results should be broken up into a single (wide) result
10387 // vector.
10388 auto *StructTy = dyn_cast<StructType>(Call->getType());
10389 if (!StructTy)
10390 return Call;
10391
10392 auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
10393 if (!VTy)
10394 return Call;
10395 unsigned N = StructTy->getNumElements();
10396
10397 // We may need to emit a cast to a svbool_t
10398 bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
10399 unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
10400
10401 ScalableVectorType *WideVTy =
10402 ScalableVectorType::get(VTy->getElementType(), MinElts * N);
10403 Value *Ret = llvm::PoisonValue::get(WideVTy);
10404 for (unsigned I = 0; I < N; ++I) {
10405 Value *SRet = Builder.CreateExtractValue(Call, I);
10406 assert(SRet->getType() == VTy && "Unexpected type for result value");
10407 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10408
10409 if (IsPredTy)
10410 SRet = EmitSVEPredicateCast(
10411 SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
10412
10413 Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
10414 }
10415 Call = Ret;
10416
10417 return Call;
10418}
10419
10421 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10422 SVETypeFlags TypeFlags) {
10423 // Find out if any arguments are required to be integer constant expressions.
10424 unsigned ICEArguments = 0;
10426 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10427 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10428
10429 // Tuple set/get only requires one insert/extract vector, which is
10430 // created by EmitSVETupleSetOrGet.
10431 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10432
10433 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10434 bool IsICE = ICEArguments & (1 << i);
10435 Value *Arg = EmitScalarExpr(E->getArg(i));
10436
10437 if (IsICE) {
10438 // If this is required to be a constant, constant fold it so that we know
10439 // that the generated intrinsic gets a ConstantInt.
10440 std::optional<llvm::APSInt> Result =
10441 E->getArg(i)->getIntegerConstantExpr(getContext());
10442 assert(Result && "Expected argument to be a constant");
10443
10444 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10445 // truncate because the immediate has been range checked and no valid
10446 // immediate requires more than a handful of bits.
10447 *Result = Result->extOrTrunc(32);
10448 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10449 continue;
10450 }
10451
10452 if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10453 Ops.push_back(Arg);
10454 continue;
10455 }
10456
10457 auto *VTy = cast<ScalableVectorType>(Arg->getType());
10458 unsigned MinElts = VTy->getMinNumElements();
10459 bool IsPred = VTy->getElementType()->isIntegerTy(1);
10460 unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10461
10462 if (N == 1) {
10463 Ops.push_back(Arg);
10464 continue;
10465 }
10466
10467 for (unsigned I = 0; I < N; ++I) {
10468 Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10469 auto *NewVTy =
10470 ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10471 Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10472 }
10473 }
10474}
10475
10477 const CallExpr *E) {
10478 llvm::Type *Ty = ConvertType(E->getType());
10479 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10480 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10481 Value *Val = EmitScalarExpr(E->getArg(0));
10482 return EmitSVEReinterpret(Val, Ty);
10483 }
10484
10485 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10487
10489 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10490 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10491
10492 if (TypeFlags.isLoad())
10493 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10494 TypeFlags.isZExtReturn());
10495 else if (TypeFlags.isStore())
10496 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10497 else if (TypeFlags.isGatherLoad())
10498 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10499 else if (TypeFlags.isScatterStore())
10500 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10501 else if (TypeFlags.isPrefetch())
10502 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10503 else if (TypeFlags.isGatherPrefetch())
10504 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10505 else if (TypeFlags.isStructLoad())
10506 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10507 else if (TypeFlags.isStructStore())
10508 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10509 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10510 return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10511 else if (TypeFlags.isTupleCreate())
10512 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10513 else if (TypeFlags.isUndef())
10514 return UndefValue::get(Ty);
10515 else if (Builtin->LLVMIntrinsic != 0) {
10516 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10518
10519 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10521
10522 // Some ACLE builtins leave out the argument to specify the predicate
10523 // pattern, which is expected to be expanded to an SV_ALL pattern.
10524 if (TypeFlags.isAppendSVALL())
10525 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10526 if (TypeFlags.isInsertOp1SVALL())
10527 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10528
10529 // Predicates must match the main datatype.
10530 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10531 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10532 if (PredTy->getElementType()->isIntegerTy(1))
10533 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10534
10535 // Splat scalar operand to vector (intrinsics with _n infix)
10536 if (TypeFlags.hasSplatOperand()) {
10537 unsigned OpNo = TypeFlags.getSplatOperand();
10538 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10539 }
10540
10541 if (TypeFlags.isReverseCompare())
10542 std::swap(Ops[1], Ops[2]);
10543 else if (TypeFlags.isReverseUSDOT())
10544 std::swap(Ops[1], Ops[2]);
10545 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10546 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10547 std::swap(Ops[1], Ops[2]);
10548 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10549 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10550 std::swap(Ops[1], Ops[3]);
10551
10552 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10553 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10554 llvm::Type *OpndTy = Ops[1]->getType();
10555 auto *SplatZero = Constant::getNullValue(OpndTy);
10556 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10557 }
10558
10559 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10560 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10561 Value *Call = Builder.CreateCall(F, Ops);
10562
10563 // Predicate results must be converted to svbool_t.
10564 if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10565 if (PredTy->getScalarType()->isIntegerTy(1))
10566 Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10567
10568 return FormSVEBuiltinResult(Call);
10569 }
10570
10571 switch (BuiltinID) {
10572 default:
10573 return nullptr;
10574
10575 case SVE::BI__builtin_sve_svreinterpret_b: {
10576 auto SVCountTy =
10577 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10578 Function *CastFromSVCountF =
10579 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10580 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10581 }
10582 case SVE::BI__builtin_sve_svreinterpret_c: {
10583 auto SVCountTy =
10584 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10585 Function *CastToSVCountF =
10586 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10587 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10588 }
10589
10590 case SVE::BI__builtin_sve_svpsel_lane_b8:
10591 case SVE::BI__builtin_sve_svpsel_lane_b16:
10592 case SVE::BI__builtin_sve_svpsel_lane_b32:
10593 case SVE::BI__builtin_sve_svpsel_lane_b64:
10594 case SVE::BI__builtin_sve_svpsel_lane_c8:
10595 case SVE::BI__builtin_sve_svpsel_lane_c16:
10596 case SVE::BI__builtin_sve_svpsel_lane_c32:
10597 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10598 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10599 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10600 "aarch64.svcount")) &&
10601 "Unexpected TargetExtType");
10602 auto SVCountTy =
10603 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10604 Function *CastFromSVCountF =
10605 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10606 Function *CastToSVCountF =
10607 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10608
10609 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10610 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10611 llvm::Value *Ops0 =
10612 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10613 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10614 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10615 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10616 }
10617 case SVE::BI__builtin_sve_svmov_b_z: {
10618 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10619 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10620 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10621 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10622 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10623 }
10624
10625 case SVE::BI__builtin_sve_svnot_b_z: {
10626 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10627 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10628 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10629 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10630 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10631 }
10632
10633 case SVE::BI__builtin_sve_svmovlb_u16:
10634 case SVE::BI__builtin_sve_svmovlb_u32:
10635 case SVE::BI__builtin_sve_svmovlb_u64:
10636 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10637
10638 case SVE::BI__builtin_sve_svmovlb_s16:
10639 case SVE::BI__builtin_sve_svmovlb_s32:
10640 case SVE::BI__builtin_sve_svmovlb_s64:
10641 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10642
10643 case SVE::BI__builtin_sve_svmovlt_u16:
10644 case SVE::BI__builtin_sve_svmovlt_u32:
10645 case SVE::BI__builtin_sve_svmovlt_u64:
10646 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10647
10648 case SVE::BI__builtin_sve_svmovlt_s16:
10649 case SVE::BI__builtin_sve_svmovlt_s32:
10650 case SVE::BI__builtin_sve_svmovlt_s64:
10651 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10652
10653 case SVE::BI__builtin_sve_svpmullt_u16:
10654 case SVE::BI__builtin_sve_svpmullt_u64:
10655 case SVE::BI__builtin_sve_svpmullt_n_u16:
10656 case SVE::BI__builtin_sve_svpmullt_n_u64:
10657 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10658
10659 case SVE::BI__builtin_sve_svpmullb_u16:
10660 case SVE::BI__builtin_sve_svpmullb_u64:
10661 case SVE::BI__builtin_sve_svpmullb_n_u16:
10662 case SVE::BI__builtin_sve_svpmullb_n_u64:
10663 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10664
10665 case SVE::BI__builtin_sve_svdup_n_b8:
10666 case SVE::BI__builtin_sve_svdup_n_b16:
10667 case SVE::BI__builtin_sve_svdup_n_b32:
10668 case SVE::BI__builtin_sve_svdup_n_b64: {
10669 Value *CmpNE =
10670 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10671 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10672 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10673 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10674 }
10675
10676 case SVE::BI__builtin_sve_svdupq_n_b8:
10677 case SVE::BI__builtin_sve_svdupq_n_b16:
10678 case SVE::BI__builtin_sve_svdupq_n_b32:
10679 case SVE::BI__builtin_sve_svdupq_n_b64:
10680 case SVE::BI__builtin_sve_svdupq_n_u8:
10681 case SVE::BI__builtin_sve_svdupq_n_s8:
10682 case SVE::BI__builtin_sve_svdupq_n_u64:
10683 case SVE::BI__builtin_sve_svdupq_n_f64:
10684 case SVE::BI__builtin_sve_svdupq_n_s64:
10685 case SVE::BI__builtin_sve_svdupq_n_u16:
10686 case SVE::BI__builtin_sve_svdupq_n_f16:
10687 case SVE::BI__builtin_sve_svdupq_n_bf16:
10688 case SVE::BI__builtin_sve_svdupq_n_s16:
10689 case SVE::BI__builtin_sve_svdupq_n_u32:
10690 case SVE::BI__builtin_sve_svdupq_n_f32:
10691 case SVE::BI__builtin_sve_svdupq_n_s32: {
10692 // These builtins are implemented by storing each element to an array and using
10693 // ld1rq to materialize a vector.
10694 unsigned NumOpnds = Ops.size();
10695
10696 bool IsBoolTy =
10697 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10698
10699 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10700 // so that the compare can use the width that is natural for the expected
10701 // number of predicate lanes.
10702 llvm::Type *EltTy = Ops[0]->getType();
10703 if (IsBoolTy)
10704 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10705
10707 for (unsigned I = 0; I < NumOpnds; ++I)
10708 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10709 Value *Vec = BuildVector(VecOps);
10710
10711 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10712 Value *InsertSubVec = Builder.CreateInsertVector(
10713 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10714
10715 Function *F =
10716 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
10717 Value *DupQLane =
10718 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
10719
10720 if (!IsBoolTy)
10721 return DupQLane;
10722
10723 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10724 Value *Pred = EmitSVEAllTruePred(TypeFlags);
10725
10726 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
10727 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
10728 : Intrinsic::aarch64_sve_cmpne_wide,
10729 OverloadedTy);
10730 Value *Call = Builder.CreateCall(
10731 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
10732 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10733 }
10734
10735 case SVE::BI__builtin_sve_svpfalse_b:
10736 return ConstantInt::getFalse(Ty);
10737
10738 case SVE::BI__builtin_sve_svpfalse_c: {
10739 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
10740 Function *CastToSVCountF =
10741 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
10742 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
10743 }
10744
10745 case SVE::BI__builtin_sve_svlen_bf16:
10746 case SVE::BI__builtin_sve_svlen_f16:
10747 case SVE::BI__builtin_sve_svlen_f32:
10748 case SVE::BI__builtin_sve_svlen_f64:
10749 case SVE::BI__builtin_sve_svlen_s8:
10750 case SVE::BI__builtin_sve_svlen_s16:
10751 case SVE::BI__builtin_sve_svlen_s32:
10752 case SVE::BI__builtin_sve_svlen_s64:
10753 case SVE::BI__builtin_sve_svlen_u8:
10754 case SVE::BI__builtin_sve_svlen_u16:
10755 case SVE::BI__builtin_sve_svlen_u32:
10756 case SVE::BI__builtin_sve_svlen_u64: {
10757 SVETypeFlags TF(Builtin->TypeModifier);
10758 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
10759 auto *NumEls =
10760 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
10761
10762 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
10763 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
10764 }
10765
10766 case SVE::BI__builtin_sve_svtbl2_u8:
10767 case SVE::BI__builtin_sve_svtbl2_s8:
10768 case SVE::BI__builtin_sve_svtbl2_u16:
10769 case SVE::BI__builtin_sve_svtbl2_s16:
10770 case SVE::BI__builtin_sve_svtbl2_u32:
10771 case SVE::BI__builtin_sve_svtbl2_s32:
10772 case SVE::BI__builtin_sve_svtbl2_u64:
10773 case SVE::BI__builtin_sve_svtbl2_s64:
10774 case SVE::BI__builtin_sve_svtbl2_f16:
10775 case SVE::BI__builtin_sve_svtbl2_bf16:
10776 case SVE::BI__builtin_sve_svtbl2_f32:
10777 case SVE::BI__builtin_sve_svtbl2_f64: {
10778 SVETypeFlags TF(Builtin->TypeModifier);
10779 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10780 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10781 return Builder.CreateCall(F, Ops);
10782 }
10783
10784 case SVE::BI__builtin_sve_svset_neonq_s8:
10785 case SVE::BI__builtin_sve_svset_neonq_s16:
10786 case SVE::BI__builtin_sve_svset_neonq_s32:
10787 case SVE::BI__builtin_sve_svset_neonq_s64:
10788 case SVE::BI__builtin_sve_svset_neonq_u8:
10789 case SVE::BI__builtin_sve_svset_neonq_u16:
10790 case SVE::BI__builtin_sve_svset_neonq_u32:
10791 case SVE::BI__builtin_sve_svset_neonq_u64:
10792 case SVE::BI__builtin_sve_svset_neonq_f16:
10793 case SVE::BI__builtin_sve_svset_neonq_f32:
10794 case SVE::BI__builtin_sve_svset_neonq_f64:
10795 case SVE::BI__builtin_sve_svset_neonq_bf16: {
10796 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
10797 }
10798
10799 case SVE::BI__builtin_sve_svget_neonq_s8:
10800 case SVE::BI__builtin_sve_svget_neonq_s16:
10801 case SVE::BI__builtin_sve_svget_neonq_s32:
10802 case SVE::BI__builtin_sve_svget_neonq_s64:
10803 case SVE::BI__builtin_sve_svget_neonq_u8:
10804 case SVE::BI__builtin_sve_svget_neonq_u16:
10805 case SVE::BI__builtin_sve_svget_neonq_u32:
10806 case SVE::BI__builtin_sve_svget_neonq_u64:
10807 case SVE::BI__builtin_sve_svget_neonq_f16:
10808 case SVE::BI__builtin_sve_svget_neonq_f32:
10809 case SVE::BI__builtin_sve_svget_neonq_f64:
10810 case SVE::BI__builtin_sve_svget_neonq_bf16: {
10811 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
10812 }
10813
10814 case SVE::BI__builtin_sve_svdup_neonq_s8:
10815 case SVE::BI__builtin_sve_svdup_neonq_s16:
10816 case SVE::BI__builtin_sve_svdup_neonq_s32:
10817 case SVE::BI__builtin_sve_svdup_neonq_s64:
10818 case SVE::BI__builtin_sve_svdup_neonq_u8:
10819 case SVE::BI__builtin_sve_svdup_neonq_u16:
10820 case SVE::BI__builtin_sve_svdup_neonq_u32:
10821 case SVE::BI__builtin_sve_svdup_neonq_u64:
10822 case SVE::BI__builtin_sve_svdup_neonq_f16:
10823 case SVE::BI__builtin_sve_svdup_neonq_f32:
10824 case SVE::BI__builtin_sve_svdup_neonq_f64:
10825 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
10826 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
10827 Builder.getInt64(0));
10828 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
10829 {Insert, Builder.getInt64(0)});
10830 }
10831 }
10832
10833 /// Should not happen
10834 return nullptr;
10835}
10836
10837static void swapCommutativeSMEOperands(unsigned BuiltinID,
10839 unsigned MultiVec;
10840 switch (BuiltinID) {
10841 default:
10842 return;
10843 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
10844 MultiVec = 1;
10845 break;
10846 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
10847 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
10848 MultiVec = 2;
10849 break;
10850 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
10851 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
10852 MultiVec = 4;
10853 break;
10854 }
10855
10856 if (MultiVec > 0)
10857 for (unsigned I = 0; I < MultiVec; ++I)
10858 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
10859}
10860
10862 const CallExpr *E) {
10863 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10865
10867 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10868 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10869
10870 if (TypeFlags.isLoad() || TypeFlags.isStore())
10871 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10872 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
10873 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10874 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
10875 BuiltinID == SME::BI__builtin_sme_svzero_za)
10876 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10877 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
10878 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
10879 BuiltinID == SME::BI__builtin_sme_svldr_za ||
10880 BuiltinID == SME::BI__builtin_sme_svstr_za)
10881 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10882
10883 // Handle builtins which require their multi-vector operands to be swapped
10884 swapCommutativeSMEOperands(BuiltinID, Ops);
10885
10886 // Should not happen!
10887 if (Builtin->LLVMIntrinsic == 0)
10888 return nullptr;
10889
10890 // Predicates must match the main datatype.
10891 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10892 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10893 if (PredTy->getElementType()->isIntegerTy(1))
10894 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10895
10896 Function *F =
10897 TypeFlags.isOverloadNone()
10898 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
10899 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10900 Value *Call = Builder.CreateCall(F, Ops);
10901
10902 return FormSVEBuiltinResult(Call);
10903}
10904
10906 const CallExpr *E,
10907 llvm::Triple::ArchType Arch) {
10908 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10909 BuiltinID <= clang::AArch64::LastSVEBuiltin)
10910 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10911
10912 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10913 BuiltinID <= clang::AArch64::LastSMEBuiltin)
10914 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10915
10916 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
10917 return EmitAArch64CpuSupports(E);
10918
10919 unsigned HintID = static_cast<unsigned>(-1);
10920 switch (BuiltinID) {
10921 default: break;
10922 case clang::AArch64::BI__builtin_arm_nop:
10923 HintID = 0;
10924 break;
10925 case clang::AArch64::BI__builtin_arm_yield:
10926 case clang::AArch64::BI__yield:
10927 HintID = 1;
10928 break;
10929 case clang::AArch64::BI__builtin_arm_wfe:
10930 case clang::AArch64::BI__wfe:
10931 HintID = 2;
10932 break;
10933 case clang::AArch64::BI__builtin_arm_wfi:
10934 case clang::AArch64::BI__wfi:
10935 HintID = 3;
10936 break;
10937 case clang::AArch64::BI__builtin_arm_sev:
10938 case clang::AArch64::BI__sev:
10939 HintID = 4;
10940 break;
10941 case clang::AArch64::BI__builtin_arm_sevl:
10942 case clang::AArch64::BI__sevl:
10943 HintID = 5;
10944 break;
10945 }
10946
10947 if (HintID != static_cast<unsigned>(-1)) {
10948 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10949 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
10950 }
10951
10952 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
10953 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
10954 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10955 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
10956 }
10957
10958 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
10959 // Create call to __arm_sme_state and store the results to the two pointers.
10961 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
10962 false),
10963 "__arm_sme_state"));
10964 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
10965 "aarch64_pstate_sm_compatible");
10966 CI->setAttributes(Attrs);
10967 CI->setCallingConv(
10968 llvm::CallingConv::
10969 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
10970 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
10971 EmitPointerWithAlignment(E->getArg(0)));
10972 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
10973 EmitPointerWithAlignment(E->getArg(1)));
10974 }
10975
10976 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10977 assert((getContext().getTypeSize(E->getType()) == 32) &&
10978 "rbit of unusual size!");
10979 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10980 return Builder.CreateCall(
10981 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10982 }
10983 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
10984 assert((getContext().getTypeSize(E->getType()) == 64) &&
10985 "rbit of unusual size!");
10986 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10987 return Builder.CreateCall(
10988 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10989 }
10990
10991 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
10992 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
10993 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10994 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
10995 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
10996 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
10997 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
10998 return Res;
10999 }
11000
11001 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
11002 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11003 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
11004 "cls");
11005 }
11006 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11007 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11008 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
11009 "cls");
11010 }
11011
11012 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11013 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11014 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11015 llvm::Type *Ty = Arg->getType();
11016 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11017 Arg, "frint32z");
11018 }
11019
11020 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11021 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11022 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11023 llvm::Type *Ty = Arg->getType();
11024 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11025 Arg, "frint64z");
11026 }
11027
11028 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11029 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11030 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11031 llvm::Type *Ty = Arg->getType();
11032 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11033 Arg, "frint32x");
11034 }
11035
11036 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11037 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11038 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11039 llvm::Type *Ty = Arg->getType();
11040 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11041 Arg, "frint64x");
11042 }
11043
11044 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11045 assert((getContext().getTypeSize(E->getType()) == 32) &&
11046 "__jcvt of unusual size!");
11047 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11048 return Builder.CreateCall(
11049 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11050 }
11051
11052 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11053 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11054 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11055 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11056 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11057 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11058
11059 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11060 // Load from the address via an LLVM intrinsic, receiving a
11061 // tuple of 8 i64 words, and store each one to ValPtr.
11062 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11063 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11064 llvm::Value *ToRet;
11065 for (size_t i = 0; i < 8; i++) {
11066 llvm::Value *ValOffsetPtr =
11067 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11068 Address Addr =
11069 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11070 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11071 }
11072 return ToRet;
11073 } else {
11074 // Load 8 i64 words from ValPtr, and store them to the address
11075 // via an LLVM intrinsic.
11077 Args.push_back(MemAddr);
11078 for (size_t i = 0; i < 8; i++) {
11079 llvm::Value *ValOffsetPtr =
11080 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11081 Address Addr =
11082 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11083 Args.push_back(Builder.CreateLoad(Addr));
11084 }
11085
11086 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11087 ? Intrinsic::aarch64_st64b
11088 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11089 ? Intrinsic::aarch64_st64bv
11090 : Intrinsic::aarch64_st64bv0);
11091 Function *F = CGM.getIntrinsic(Intr);
11092 return Builder.CreateCall(F, Args);
11093 }
11094 }
11095
11096 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11097 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11098
11099 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11100 ? Intrinsic::aarch64_rndr
11101 : Intrinsic::aarch64_rndrrs);
11102 Function *F = CGM.getIntrinsic(Intr);
11103 llvm::Value *Val = Builder.CreateCall(F);
11104 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11105 Value *Status = Builder.CreateExtractValue(Val, 1);
11106
11107 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11108 Builder.CreateStore(RandomValue, MemAddress);
11109 Status = Builder.CreateZExt(Status, Int32Ty);
11110 return Status;
11111 }
11112
11113 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11114 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11115 const FunctionDecl *FD = E->getDirectCallee();
11116 Value *Ops[2];
11117 for (unsigned i = 0; i < 2; i++)
11118 Ops[i] = EmitScalarExpr(E->getArg(i));
11119 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11120 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11121 StringRef Name = FD->getName();
11122 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11123 }
11124
11125 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11126 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11127 getContext().getTypeSize(E->getType()) == 128) {
11128 Function *F =
11129 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11130 ? Intrinsic::aarch64_ldaxp
11131 : Intrinsic::aarch64_ldxp);
11132
11133 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11134 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11135
11136 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11137 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11138 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11139 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11140 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11141
11142 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11143 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11144 Val = Builder.CreateOr(Val, Val1);
11145 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11146 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11147 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11148 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11149
11150 QualType Ty = E->getType();
11151 llvm::Type *RealResTy = ConvertType(Ty);
11152 llvm::Type *IntTy =
11153 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11154
11155 Function *F =
11156 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11157 ? Intrinsic::aarch64_ldaxr
11158 : Intrinsic::aarch64_ldxr,
11159 UnqualPtrTy);
11160 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11161 Val->addParamAttr(
11162 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11163
11164 if (RealResTy->isPointerTy())
11165 return Builder.CreateIntToPtr(Val, RealResTy);
11166
11167 llvm::Type *IntResTy = llvm::IntegerType::get(
11168 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11169 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11170 RealResTy);
11171 }
11172
11173 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11174 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11175 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11176 Function *F =
11177 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11178 ? Intrinsic::aarch64_stlxp
11179 : Intrinsic::aarch64_stxp);
11180 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11181
11182 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11183 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11184
11185 Tmp = Tmp.withElementType(STy);
11186 llvm::Value *Val = Builder.CreateLoad(Tmp);
11187
11188 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11189 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11190 Value *StPtr = EmitScalarExpr(E->getArg(1));
11191 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11192 }
11193
11194 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11195 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11196 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11197 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11198
11199 QualType Ty = E->getArg(0)->getType();
11200 llvm::Type *StoreTy =
11201 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11202
11203 if (StoreVal->getType()->isPointerTy())
11204 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11205 else {
11206 llvm::Type *IntTy = llvm::IntegerType::get(
11208 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11209 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11210 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11211 }
11212
11213 Function *F =
11214 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11215 ? Intrinsic::aarch64_stlxr
11216 : Intrinsic::aarch64_stxr,
11217 StoreAddr->getType());
11218 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11219 CI->addParamAttr(
11220 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11221 return CI;
11222 }
11223
11224 if (BuiltinID == clang::AArch64::BI__getReg) {
11226 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11227 llvm_unreachable("Sema will ensure that the parameter is constant");
11228
11229 llvm::APSInt Value = Result.Val.getInt();
11230 LLVMContext &Context = CGM.getLLVMContext();
11231 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11232
11233 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11234 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11235 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11236
11237 llvm::Function *F =
11238 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11239 return Builder.CreateCall(F, Metadata);
11240 }
11241
11242 if (BuiltinID == clang::AArch64::BI__break) {
11244 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11245 llvm_unreachable("Sema will ensure that the parameter is constant");
11246
11247 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11248 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11249 }
11250
11251 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11252 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11253 return Builder.CreateCall(F);
11254 }
11255
11256 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11257 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11258 llvm::SyncScope::SingleThread);
11259
11260 // CRC32
11261 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11262 switch (BuiltinID) {
11263 case clang::AArch64::BI__builtin_arm_crc32b:
11264 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11265 case clang::AArch64::BI__builtin_arm_crc32cb:
11266 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11267 case clang::AArch64::BI__builtin_arm_crc32h:
11268 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11269 case clang::AArch64::BI__builtin_arm_crc32ch:
11270 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11271 case clang::AArch64::BI__builtin_arm_crc32w:
11272 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11273 case clang::AArch64::BI__builtin_arm_crc32cw:
11274 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11275 case clang::AArch64::BI__builtin_arm_crc32d:
11276 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11277 case clang::AArch64::BI__builtin_arm_crc32cd:
11278 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11279 }
11280
11281 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11282 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11283 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11284 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11285
11286 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11287 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11288
11289 return Builder.CreateCall(F, {Arg0, Arg1});
11290 }
11291
11292 // Memory Operations (MOPS)
11293 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11294 Value *Dst = EmitScalarExpr(E->getArg(0));
11295 Value *Val = EmitScalarExpr(E->getArg(1));
11296 Value *Size = EmitScalarExpr(E->getArg(2));
11297 Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
11298 Val = Builder.CreateTrunc(Val, Int8Ty);
11299 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11300 return Builder.CreateCall(
11301 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11302 }
11303
11304 // Memory Tagging Extensions (MTE) Intrinsics
11305 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11306 switch (BuiltinID) {
11307 case clang::AArch64::BI__builtin_arm_irg:
11308 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11309 case clang::AArch64::BI__builtin_arm_addg:
11310 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11311 case clang::AArch64::BI__builtin_arm_gmi:
11312 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11313 case clang::AArch64::BI__builtin_arm_ldg:
11314 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11315 case clang::AArch64::BI__builtin_arm_stg:
11316 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11317 case clang::AArch64::BI__builtin_arm_subp:
11318 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11319 }
11320
11321 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11322 llvm::Type *T = ConvertType(E->getType());
11323
11324 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11325 Value *Pointer = EmitScalarExpr(E->getArg(0));
11326 Value *Mask = EmitScalarExpr(E->getArg(1));
11327
11328 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11329 Mask = Builder.CreateZExt(Mask, Int64Ty);
11330 Value *RV = Builder.CreateCall(
11331 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
11332 return Builder.CreatePointerCast(RV, T);
11333 }
11334 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11335 Value *Pointer = EmitScalarExpr(E->getArg(0));
11336 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11337
11338 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11339 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11340 Value *RV = Builder.CreateCall(
11341 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
11342 return Builder.CreatePointerCast(RV, T);
11343 }
11344 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11345 Value *Pointer = EmitScalarExpr(E->getArg(0));
11346 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11347
11348 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11349 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11350 return Builder.CreateCall(
11351 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11352 }
11353 // Although it is possible to supply a different return
11354 // address (first arg) to this intrinsic, for now we set
11355 // return address same as input address.
11356 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11357 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11358 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11359 Value *RV = Builder.CreateCall(
11360 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11361 return Builder.CreatePointerCast(RV, T);
11362 }
11363 // Although it is possible to supply a different tag (to set)
11364 // to this intrinsic (as first arg), for now we supply
11365 // the tag that is in input address arg (common use case).
11366 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11367 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11368 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11369 return Builder.CreateCall(
11370 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11371 }
11372 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11373 Value *PointerA = EmitScalarExpr(E->getArg(0));
11374 Value *PointerB = EmitScalarExpr(E->getArg(1));
11375 PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
11376 PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
11377 return Builder.CreateCall(
11378 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11379 }
11380 }
11381
11382 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11383 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11384 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11385 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11386 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11387 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11388 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11389 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11390
11391 SpecialRegisterAccessKind AccessKind = Write;
11392 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11393 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11394 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11395 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11396 AccessKind = VolatileRead;
11397
11398 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11399 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11400
11401 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11402 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11403
11404 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11405 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11406
11407 llvm::Type *ValueType;
11408 llvm::Type *RegisterType = Int64Ty;
11409 if (Is32Bit) {
11410 ValueType = Int32Ty;
11411 } else if (Is128Bit) {
11412 llvm::Type *Int128Ty =
11413 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11414 ValueType = Int128Ty;
11415 RegisterType = Int128Ty;
11416 } else if (IsPointerBuiltin) {
11417 ValueType = VoidPtrTy;
11418 } else {
11419 ValueType = Int64Ty;
11420 };
11421
11422 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11423 AccessKind);
11424 }
11425
11426 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11427 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11428 LLVMContext &Context = CGM.getLLVMContext();
11429
11430 unsigned SysReg =
11431 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11432
11433 std::string SysRegStr;
11434 llvm::raw_string_ostream(SysRegStr) <<
11435 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11436 ((SysReg >> 11) & 7) << ":" <<
11437 ((SysReg >> 7) & 15) << ":" <<
11438 ((SysReg >> 3) & 15) << ":" <<
11439 ( SysReg & 7);
11440
11441 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11442 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11443 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11444
11445 llvm::Type *RegisterType = Int64Ty;
11446 llvm::Type *Types[] = { RegisterType };
11447
11448 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11449 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11450
11451 return Builder.CreateCall(F, Metadata);
11452 }
11453
11454 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11455 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11456
11457 return Builder.CreateCall(F, { Metadata, ArgValue });
11458 }
11459
11460 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11461 llvm::Function *F =
11462 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11463 return Builder.CreateCall(F);
11464 }
11465
11466 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11467 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11468 return Builder.CreateCall(F);
11469 }
11470
11471 if (BuiltinID == clang::AArch64::BI__mulh ||
11472 BuiltinID == clang::AArch64::BI__umulh) {
11473 llvm::Type *ResType = ConvertType(E->getType());
11474 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11475
11476 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11477 Value *LHS =
11478 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11479 Value *RHS =
11480 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11481
11482 Value *MulResult, *HigherBits;
11483 if (IsSigned) {
11484 MulResult = Builder.CreateNSWMul(LHS, RHS);
11485 HigherBits = Builder.CreateAShr(MulResult, 64);
11486 } else {
11487 MulResult = Builder.CreateNUWMul(LHS, RHS);
11488 HigherBits = Builder.CreateLShr(MulResult, 64);
11489 }
11490 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11491
11492 return HigherBits;
11493 }
11494
11495 if (BuiltinID == AArch64::BI__writex18byte ||
11496 BuiltinID == AArch64::BI__writex18word ||
11497 BuiltinID == AArch64::BI__writex18dword ||
11498 BuiltinID == AArch64::BI__writex18qword) {
11499 // Read x18 as i8*
11500 LLVMContext &Context = CGM.getLLVMContext();
11501 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11502 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11503 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11504 llvm::Function *F =
11505 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11506 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11507 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11508
11509 // Store val at x18 + offset
11510 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11511 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11512 Value *Val = EmitScalarExpr(E->getArg(1));
11513 StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
11514 return Store;
11515 }
11516
11517 if (BuiltinID == AArch64::BI__readx18byte ||
11518 BuiltinID == AArch64::BI__readx18word ||
11519 BuiltinID == AArch64::BI__readx18dword ||
11520 BuiltinID == AArch64::BI__readx18qword) {
11521 llvm::Type *IntTy = ConvertType(E->getType());
11522
11523 // Read x18 as i8*
11524 LLVMContext &Context = CGM.getLLVMContext();
11525 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11526 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11527 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11528 llvm::Function *F =
11529 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11530 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11531 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11532
11533 // Load x18 + offset
11534 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11535 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11536 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11537 return Load;
11538 }
11539
11540 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11541 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11542 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11543 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11544 Value *Arg = EmitScalarExpr(E->getArg(0));
11545 llvm::Type *RetTy = ConvertType(E->getType());
11546 return Builder.CreateBitCast(Arg, RetTy);
11547 }
11548
11549 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11550 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11551 BuiltinID == AArch64::BI_CountLeadingZeros ||
11552 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11553 Value *Arg = EmitScalarExpr(E->getArg(0));
11554 llvm::Type *ArgType = Arg->getType();
11555
11556 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11557 BuiltinID == AArch64::BI_CountLeadingOnes64)
11558 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11559
11560 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11561 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11562
11563 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11564 BuiltinID == AArch64::BI_CountLeadingZeros64)
11565 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11566 return Result;
11567 }
11568
11569 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11570 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11571 Value *Arg = EmitScalarExpr(E->getArg(0));
11572
11573 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11574 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11575 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11576
11577 Value *Result = Builder.CreateCall(F, Arg, "cls");
11578 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11579 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11580 return Result;
11581 }
11582
11583 if (BuiltinID == AArch64::BI_CountOneBits ||
11584 BuiltinID == AArch64::BI_CountOneBits64) {
11585 Value *ArgValue = EmitScalarExpr(E->getArg(0));
11586 llvm::Type *ArgType = ArgValue->getType();
11587 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11588
11589 Value *Result = Builder.CreateCall(F, ArgValue);
11590 if (BuiltinID == AArch64::BI_CountOneBits64)
11591 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11592 return Result;
11593 }
11594
11595 if (BuiltinID == AArch64::BI__prefetch) {
11596 Value *Address = EmitScalarExpr(E->getArg(0));
11597 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11598 Value *Locality = ConstantInt::get(Int32Ty, 3);
11599 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11600 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11601 return Builder.CreateCall(F, {Address, RW, Locality, Data});
11602 }
11603
11604 if (BuiltinID == AArch64::BI__hlt) {
11605 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
11606 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11607
11608 // Return 0 for convenience, even though MSVC returns some other undefined
11609 // value.
11610 return ConstantInt::get(Builder.getInt32Ty(), 0);
11611 }
11612
11613 // Handle MSVC intrinsics before argument evaluation to prevent double
11614 // evaluation.
11615 if (std::optional<MSVCIntrin> MsvcIntId =
11617 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11618
11619 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11620 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11621 return P.first == BuiltinID;
11622 });
11623 if (It != end(NEONEquivalentIntrinsicMap))
11624 BuiltinID = It->second;
11625
11626 // Find out if any arguments are required to be integer constant
11627 // expressions.
11628 unsigned ICEArguments = 0;
11630 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11631 assert(Error == ASTContext::GE_None && "Should not codegen an error");
11632
11634 Address PtrOp0 = Address::invalid();
11635 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11636 if (i == 0) {
11637 switch (BuiltinID) {
11638 case NEON::BI__builtin_neon_vld1_v:
11639 case NEON::BI__builtin_neon_vld1q_v:
11640 case NEON::BI__builtin_neon_vld1_dup_v:
11641 case NEON::BI__builtin_neon_vld1q_dup_v:
11642 case NEON::BI__builtin_neon_vld1_lane_v:
11643 case NEON::BI__builtin_neon_vld1q_lane_v:
11644 case NEON::BI__builtin_neon_vst1_v:
11645 case NEON::BI__builtin_neon_vst1q_v:
11646 case NEON::BI__builtin_neon_vst1_lane_v:
11647 case NEON::BI__builtin_neon_vst1q_lane_v:
11648 case NEON::BI__builtin_neon_vldap1_lane_s64:
11649 case NEON::BI__builtin_neon_vldap1q_lane_s64:
11650 case NEON::BI__builtin_neon_vstl1_lane_s64:
11651 case NEON::BI__builtin_neon_vstl1q_lane_s64:
11652 // Get the alignment for the argument in addition to the value;
11653 // we'll use it later.
11654 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11655 Ops.push_back(PtrOp0.emitRawPointer(*this));
11656 continue;
11657 }
11658 }
11659 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11660 }
11661
11662 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
11663 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
11664 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
11665
11666 if (Builtin) {
11667 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
11668 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
11669 assert(Result && "SISD intrinsic should have been handled");
11670 return Result;
11671 }
11672
11673 const Expr *Arg = E->getArg(E->getNumArgs()-1);
11675 if (std::optional<llvm::APSInt> Result =
11677 // Determine the type of this overloaded NEON intrinsic.
11678 Type = NeonTypeFlags(Result->getZExtValue());
11679
11680 bool usgn = Type.isUnsigned();
11681 bool quad = Type.isQuad();
11682
11683 // Handle non-overloaded intrinsics first.
11684 switch (BuiltinID) {
11685 default: break;
11686 case NEON::BI__builtin_neon_vabsh_f16:
11687 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11688 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
11689 case NEON::BI__builtin_neon_vaddq_p128: {
11690 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
11691 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11692 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11693 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11694 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
11695 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11696 return Builder.CreateBitCast(Ops[0], Int128Ty);
11697 }
11698 case NEON::BI__builtin_neon_vldrq_p128: {
11699 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11700 Value *Ptr = EmitScalarExpr(E->getArg(0));
11701 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
11703 }
11704 case NEON::BI__builtin_neon_vstrq_p128: {
11705 Value *Ptr = Ops[0];
11706 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
11707 }
11708 case NEON::BI__builtin_neon_vcvts_f32_u32:
11709 case NEON::BI__builtin_neon_vcvtd_f64_u64:
11710 usgn = true;
11711 [[fallthrough]];
11712 case NEON::BI__builtin_neon_vcvts_f32_s32:
11713 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
11714 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11715 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
11716 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
11717 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
11718 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11719 if (usgn)
11720 return Builder.CreateUIToFP(Ops[0], FTy);
11721 return Builder.CreateSIToFP(Ops[0], FTy);
11722 }
11723 case NEON::BI__builtin_neon_vcvth_f16_u16:
11724 case NEON::BI__builtin_neon_vcvth_f16_u32:
11725 case NEON::BI__builtin_neon_vcvth_f16_u64:
11726 usgn = true;
11727 [[fallthrough]];
11728 case NEON::BI__builtin_neon_vcvth_f16_s16:
11729 case NEON::BI__builtin_neon_vcvth_f16_s32:
11730 case NEON::BI__builtin_neon_vcvth_f16_s64: {
11731 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11732 llvm::Type *FTy = HalfTy;
11733 llvm::Type *InTy;
11734 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
11735 InTy = Int64Ty;
11736 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
11737 InTy = Int32Ty;
11738 else
11739 InTy = Int16Ty;
11740 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11741 if (usgn)
11742 return Builder.CreateUIToFP(Ops[0], FTy);
11743 return Builder.CreateSIToFP(Ops[0], FTy);
11744 }
11745 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11746 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11747 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11748 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11749 case NEON::BI__builtin_neon_vcvth_u16_f16:
11750 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11751 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11752 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11753 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11754 case NEON::BI__builtin_neon_vcvth_s16_f16: {
11755 unsigned Int;
11756 llvm::Type* InTy = Int32Ty;
11757 llvm::Type* FTy = HalfTy;
11758 llvm::Type *Tys[2] = {InTy, FTy};
11759 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11760 switch (BuiltinID) {
11761 default: llvm_unreachable("missing builtin ID in switch!");
11762 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11763 Int = Intrinsic::aarch64_neon_fcvtau; break;
11764 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11765 Int = Intrinsic::aarch64_neon_fcvtmu; break;
11766 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11767 Int = Intrinsic::aarch64_neon_fcvtnu; break;
11768 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11769 Int = Intrinsic::aarch64_neon_fcvtpu; break;
11770 case NEON::BI__builtin_neon_vcvth_u16_f16:
11771 Int = Intrinsic::aarch64_neon_fcvtzu; break;
11772 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11773 Int = Intrinsic::aarch64_neon_fcvtas; break;
11774 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11775 Int = Intrinsic::aarch64_neon_fcvtms; break;
11776 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11777 Int = Intrinsic::aarch64_neon_fcvtns; break;
11778 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11779 Int = Intrinsic::aarch64_neon_fcvtps; break;
11780 case NEON::BI__builtin_neon_vcvth_s16_f16:
11781 Int = Intrinsic::aarch64_neon_fcvtzs; break;
11782 }
11783 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
11784 return Builder.CreateTrunc(Ops[0], Int16Ty);
11785 }
11786 case NEON::BI__builtin_neon_vcaleh_f16:
11787 case NEON::BI__builtin_neon_vcalth_f16:
11788 case NEON::BI__builtin_neon_vcageh_f16:
11789 case NEON::BI__builtin_neon_vcagth_f16: {
11790 unsigned Int;
11791 llvm::Type* InTy = Int32Ty;
11792 llvm::Type* FTy = HalfTy;
11793 llvm::Type *Tys[2] = {InTy, FTy};
11794 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11795 switch (BuiltinID) {
11796 default: llvm_unreachable("missing builtin ID in switch!");
11797 case NEON::BI__builtin_neon_vcageh_f16:
11798 Int = Intrinsic::aarch64_neon_facge; break;
11799 case NEON::BI__builtin_neon_vcagth_f16:
11800 Int = Intrinsic::aarch64_neon_facgt; break;
11801 case NEON::BI__builtin_neon_vcaleh_f16:
11802 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
11803 case NEON::BI__builtin_neon_vcalth_f16:
11804 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
11805 }
11806 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
11807 return Builder.CreateTrunc(Ops[0], Int16Ty);
11808 }
11809 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11810 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
11811 unsigned Int;
11812 llvm::Type* InTy = Int32Ty;
11813 llvm::Type* FTy = HalfTy;
11814 llvm::Type *Tys[2] = {InTy, FTy};
11815 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11816 switch (BuiltinID) {
11817 default: llvm_unreachable("missing builtin ID in switch!");
11818 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11819 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
11820 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
11821 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
11822 }
11823 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11824 return Builder.CreateTrunc(Ops[0], Int16Ty);
11825 }
11826 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11827 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
11828 unsigned Int;
11829 llvm::Type* FTy = HalfTy;
11830 llvm::Type* InTy = Int32Ty;
11831 llvm::Type *Tys[2] = {FTy, InTy};
11832 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11833 switch (BuiltinID) {
11834 default: llvm_unreachable("missing builtin ID in switch!");
11835 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11836 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
11837 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
11838 break;
11839 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
11840 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
11841 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
11842 break;
11843 }
11844 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11845 }
11846 case NEON::BI__builtin_neon_vpaddd_s64: {
11847 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
11848 Value *Vec = EmitScalarExpr(E->getArg(0));
11849 // The vector is v2f64, so make sure it's bitcast to that.
11850 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
11851 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11852 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11853 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11854 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11855 // Pairwise addition of a v2f64 into a scalar f64.
11856 return Builder.CreateAdd(Op0, Op1, "vpaddd");
11857 }
11858 case NEON::BI__builtin_neon_vpaddd_f64: {
11859 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
11860 Value *Vec = EmitScalarExpr(E->getArg(0));
11861 // The vector is v2f64, so make sure it's bitcast to that.
11862 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
11863 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11864 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11865 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11866 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11867 // Pairwise addition of a v2f64 into a scalar f64.
11868 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11869 }
11870 case NEON::BI__builtin_neon_vpadds_f32: {
11871 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
11872 Value *Vec = EmitScalarExpr(E->getArg(0));
11873 // The vector is v2f32, so make sure it's bitcast to that.
11874 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
11875 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11876 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11877 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11878 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11879 // Pairwise addition of a v2f32 into a scalar f32.
11880 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11881 }
11882 case NEON::BI__builtin_neon_vceqzd_s64:
11883 case NEON::BI__builtin_neon_vceqzd_f64:
11884 case NEON::BI__builtin_neon_vceqzs_f32:
11885 case NEON::BI__builtin_neon_vceqzh_f16:
11886 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11888 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11889 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
11890 case NEON::BI__builtin_neon_vcgezd_s64:
11891 case NEON::BI__builtin_neon_vcgezd_f64:
11892 case NEON::BI__builtin_neon_vcgezs_f32:
11893 case NEON::BI__builtin_neon_vcgezh_f16:
11894 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11896 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11897 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
11898 case NEON::BI__builtin_neon_vclezd_s64:
11899 case NEON::BI__builtin_neon_vclezd_f64:
11900 case NEON::BI__builtin_neon_vclezs_f32:
11901 case NEON::BI__builtin_neon_vclezh_f16:
11902 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11904 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11905 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
11906 case NEON::BI__builtin_neon_vcgtzd_s64:
11907 case NEON::BI__builtin_neon_vcgtzd_f64:
11908 case NEON::BI__builtin_neon_vcgtzs_f32:
11909 case NEON::BI__builtin_neon_vcgtzh_f16:
11910 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11912 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11913 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
11914 case NEON::BI__builtin_neon_vcltzd_s64:
11915 case NEON::BI__builtin_neon_vcltzd_f64:
11916 case NEON::BI__builtin_neon_vcltzs_f32:
11917 case NEON::BI__builtin_neon_vcltzh_f16:
11918 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11920 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11921 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
11922
11923 case NEON::BI__builtin_neon_vceqzd_u64: {
11924 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11925 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11926 Ops[0] =
11927 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
11928 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
11929 }
11930 case NEON::BI__builtin_neon_vceqd_f64:
11931 case NEON::BI__builtin_neon_vcled_f64:
11932 case NEON::BI__builtin_neon_vcltd_f64:
11933 case NEON::BI__builtin_neon_vcged_f64:
11934 case NEON::BI__builtin_neon_vcgtd_f64: {
11935 llvm::CmpInst::Predicate P;
11936 switch (BuiltinID) {
11937 default: llvm_unreachable("missing builtin ID in switch!");
11938 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
11939 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
11940 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
11941 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
11942 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
11943 }
11944 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11945 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11946 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11947 if (P == llvm::FCmpInst::FCMP_OEQ)
11948 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11949 else
11950 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11951 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
11952 }
11953 case NEON::BI__builtin_neon_vceqs_f32:
11954 case NEON::BI__builtin_neon_vcles_f32:
11955 case NEON::BI__builtin_neon_vclts_f32:
11956 case NEON::BI__builtin_neon_vcges_f32:
11957 case NEON::BI__builtin_neon_vcgts_f32: {
11958 llvm::CmpInst::Predicate P;
11959 switch (BuiltinID) {
11960 default: llvm_unreachable("missing builtin ID in switch!");
11961 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
11962 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
11963 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
11964 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
11965 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
11966 }
11967 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11968 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
11969 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
11970 if (P == llvm::FCmpInst::FCMP_OEQ)
11971 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11972 else
11973 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11974 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
11975 }
11976 case NEON::BI__builtin_neon_vceqh_f16:
11977 case NEON::BI__builtin_neon_vcleh_f16:
11978 case NEON::BI__builtin_neon_vclth_f16:
11979 case NEON::BI__builtin_neon_vcgeh_f16:
11980 case NEON::BI__builtin_neon_vcgth_f16: {
11981 llvm::CmpInst::Predicate P;
11982 switch (BuiltinID) {
11983 default: llvm_unreachable("missing builtin ID in switch!");
11984 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
11985 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
11986 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
11987 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
11988 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
11989 }
11990 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11991 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
11992 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
11993 if (P == llvm::FCmpInst::FCMP_OEQ)
11994 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11995 else
11996 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11997 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
11998 }
11999 case NEON::BI__builtin_neon_vceqd_s64:
12000 case NEON::BI__builtin_neon_vceqd_u64:
12001 case NEON::BI__builtin_neon_vcgtd_s64:
12002 case NEON::BI__builtin_neon_vcgtd_u64:
12003 case NEON::BI__builtin_neon_vcltd_s64:
12004 case NEON::BI__builtin_neon_vcltd_u64:
12005 case NEON::BI__builtin_neon_vcged_u64:
12006 case NEON::BI__builtin_neon_vcged_s64:
12007 case NEON::BI__builtin_neon_vcled_u64:
12008 case NEON::BI__builtin_neon_vcled_s64: {
12009 llvm::CmpInst::Predicate P;
12010 switch (BuiltinID) {
12011 default: llvm_unreachable("missing builtin ID in switch!");
12012 case NEON::BI__builtin_neon_vceqd_s64:
12013 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12014 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12015 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12016 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12017 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12018 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12019 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12020 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12021 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12022 }
12023 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12024 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12025 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12026 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12027 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12028 }
12029 case NEON::BI__builtin_neon_vtstd_s64:
12030 case NEON::BI__builtin_neon_vtstd_u64: {
12031 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12032 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12033 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12034 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12035 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12036 llvm::Constant::getNullValue(Int64Ty));
12037 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12038 }
12039 case NEON::BI__builtin_neon_vset_lane_i8:
12040 case NEON::BI__builtin_neon_vset_lane_i16:
12041 case NEON::BI__builtin_neon_vset_lane_i32:
12042 case NEON::BI__builtin_neon_vset_lane_i64:
12043 case NEON::BI__builtin_neon_vset_lane_bf16:
12044 case NEON::BI__builtin_neon_vset_lane_f32:
12045 case NEON::BI__builtin_neon_vsetq_lane_i8:
12046 case NEON::BI__builtin_neon_vsetq_lane_i16:
12047 case NEON::BI__builtin_neon_vsetq_lane_i32:
12048 case NEON::BI__builtin_neon_vsetq_lane_i64:
12049 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12050 case NEON::BI__builtin_neon_vsetq_lane_f32:
12051 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12052 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12053 case NEON::BI__builtin_neon_vset_lane_f64:
12054 // The vector type needs a cast for the v1f64 variant.
12055 Ops[1] =
12056 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12057 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12058 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12059 case NEON::BI__builtin_neon_vsetq_lane_f64:
12060 // The vector type needs a cast for the v2f64 variant.
12061 Ops[1] =
12062 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12063 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12064 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12065
12066 case NEON::BI__builtin_neon_vget_lane_i8:
12067 case NEON::BI__builtin_neon_vdupb_lane_i8:
12068 Ops[0] =
12069 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12070 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12071 "vget_lane");
12072 case NEON::BI__builtin_neon_vgetq_lane_i8:
12073 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12074 Ops[0] =
12075 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12076 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12077 "vgetq_lane");
12078 case NEON::BI__builtin_neon_vget_lane_i16:
12079 case NEON::BI__builtin_neon_vduph_lane_i16:
12080 Ops[0] =
12081 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12082 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12083 "vget_lane");
12084 case NEON::BI__builtin_neon_vgetq_lane_i16:
12085 case NEON::BI__builtin_neon_vduph_laneq_i16:
12086 Ops[0] =
12087 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12088 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12089 "vgetq_lane");
12090 case NEON::BI__builtin_neon_vget_lane_i32:
12091 case NEON::BI__builtin_neon_vdups_lane_i32:
12092 Ops[0] =
12093 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12094 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12095 "vget_lane");
12096 case NEON::BI__builtin_neon_vdups_lane_f32:
12097 Ops[0] =
12098 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12099 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12100 "vdups_lane");
12101 case NEON::BI__builtin_neon_vgetq_lane_i32:
12102 case NEON::BI__builtin_neon_vdups_laneq_i32:
12103 Ops[0] =
12104 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12105 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12106 "vgetq_lane");
12107 case NEON::BI__builtin_neon_vget_lane_i64:
12108 case NEON::BI__builtin_neon_vdupd_lane_i64:
12109 Ops[0] =
12110 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12111 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12112 "vget_lane");
12113 case NEON::BI__builtin_neon_vdupd_lane_f64:
12114 Ops[0] =
12115 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12116 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12117 "vdupd_lane");
12118 case NEON::BI__builtin_neon_vgetq_lane_i64:
12119 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12120 Ops[0] =
12121 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12122 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12123 "vgetq_lane");
12124 case NEON::BI__builtin_neon_vget_lane_f32:
12125 Ops[0] =
12126 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12127 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12128 "vget_lane");
12129 case NEON::BI__builtin_neon_vget_lane_f64:
12130 Ops[0] =
12131 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12132 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12133 "vget_lane");
12134 case NEON::BI__builtin_neon_vgetq_lane_f32:
12135 case NEON::BI__builtin_neon_vdups_laneq_f32:
12136 Ops[0] =
12137 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12138 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12139 "vgetq_lane");
12140 case NEON::BI__builtin_neon_vgetq_lane_f64:
12141 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12142 Ops[0] =
12143 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12144 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12145 "vgetq_lane");
12146 case NEON::BI__builtin_neon_vaddh_f16:
12147 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12148 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12149 case NEON::BI__builtin_neon_vsubh_f16:
12150 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12151 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12152 case NEON::BI__builtin_neon_vmulh_f16:
12153 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12154 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12155 case NEON::BI__builtin_neon_vdivh_f16:
12156 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12157 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12158 case NEON::BI__builtin_neon_vfmah_f16:
12159 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12161 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12162 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12163 case NEON::BI__builtin_neon_vfmsh_f16: {
12164 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12165
12166 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12168 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12169 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12170 }
12171 case NEON::BI__builtin_neon_vaddd_s64:
12172 case NEON::BI__builtin_neon_vaddd_u64:
12173 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12174 case NEON::BI__builtin_neon_vsubd_s64:
12175 case NEON::BI__builtin_neon_vsubd_u64:
12176 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12177 case NEON::BI__builtin_neon_vqdmlalh_s16:
12178 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12179 SmallVector<Value *, 2> ProductOps;
12180 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12181 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12182 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12183 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12184 ProductOps, "vqdmlXl");
12185 Constant *CI = ConstantInt::get(SizeTy, 0);
12186 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12187
12188 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12189 ? Intrinsic::aarch64_neon_sqadd
12190 : Intrinsic::aarch64_neon_sqsub;
12191 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12192 }
12193 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12194 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12195 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12196 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12197 Ops, "vqshlu_n");
12198 }
12199 case NEON::BI__builtin_neon_vqshld_n_u64:
12200 case NEON::BI__builtin_neon_vqshld_n_s64: {
12201 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12202 ? Intrinsic::aarch64_neon_uqshl
12203 : Intrinsic::aarch64_neon_sqshl;
12204 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12205 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12206 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12207 }
12208 case NEON::BI__builtin_neon_vrshrd_n_u64:
12209 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12210 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12211 ? Intrinsic::aarch64_neon_urshl
12212 : Intrinsic::aarch64_neon_srshl;
12213 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12214 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12215 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12216 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12217 }
12218 case NEON::BI__builtin_neon_vrsrad_n_u64:
12219 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12220 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12221 ? Intrinsic::aarch64_neon_urshl
12222 : Intrinsic::aarch64_neon_srshl;
12223 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12224 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12225 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12226 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12227 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12228 }
12229 case NEON::BI__builtin_neon_vshld_n_s64:
12230 case NEON::BI__builtin_neon_vshld_n_u64: {
12231 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12232 return Builder.CreateShl(
12233 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12234 }
12235 case NEON::BI__builtin_neon_vshrd_n_s64: {
12236 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12237 return Builder.CreateAShr(
12238 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12239 Amt->getZExtValue())),
12240 "shrd_n");
12241 }
12242 case NEON::BI__builtin_neon_vshrd_n_u64: {
12243 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12244 uint64_t ShiftAmt = Amt->getZExtValue();
12245 // Right-shifting an unsigned value by its size yields 0.
12246 if (ShiftAmt == 64)
12247 return ConstantInt::get(Int64Ty, 0);
12248 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12249 "shrd_n");
12250 }
12251 case NEON::BI__builtin_neon_vsrad_n_s64: {
12252 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12253 Ops[1] = Builder.CreateAShr(
12254 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12255 Amt->getZExtValue())),
12256 "shrd_n");
12257 return Builder.CreateAdd(Ops[0], Ops[1]);
12258 }
12259 case NEON::BI__builtin_neon_vsrad_n_u64: {
12260 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12261 uint64_t ShiftAmt = Amt->getZExtValue();
12262 // Right-shifting an unsigned value by its size yields 0.
12263 // As Op + 0 = Op, return Ops[0] directly.
12264 if (ShiftAmt == 64)
12265 return Ops[0];
12266 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12267 "shrd_n");
12268 return Builder.CreateAdd(Ops[0], Ops[1]);
12269 }
12270 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12271 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12272 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12273 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12274 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12275 "lane");
12276 SmallVector<Value *, 2> ProductOps;
12277 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12278 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12279 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12280 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12281 ProductOps, "vqdmlXl");
12282 Constant *CI = ConstantInt::get(SizeTy, 0);
12283 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12284 Ops.pop_back();
12285
12286 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12287 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12288 ? Intrinsic::aarch64_neon_sqadd
12289 : Intrinsic::aarch64_neon_sqsub;
12290 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12291 }
12292 case NEON::BI__builtin_neon_vqdmlals_s32:
12293 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12294 SmallVector<Value *, 2> ProductOps;
12295 ProductOps.push_back(Ops[1]);
12296 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12297 Ops[1] =
12298 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12299 ProductOps, "vqdmlXl");
12300
12301 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12302 ? Intrinsic::aarch64_neon_sqadd
12303 : Intrinsic::aarch64_neon_sqsub;
12304 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12305 }
12306 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12307 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12308 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12309 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12310 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12311 "lane");
12312 SmallVector<Value *, 2> ProductOps;
12313 ProductOps.push_back(Ops[1]);
12314 ProductOps.push_back(Ops[2]);
12315 Ops[1] =
12316 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12317 ProductOps, "vqdmlXl");
12318 Ops.pop_back();
12319
12320 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12321 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12322 ? Intrinsic::aarch64_neon_sqadd
12323 : Intrinsic::aarch64_neon_sqsub;
12324 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12325 }
12326 case NEON::BI__builtin_neon_vget_lane_bf16:
12327 case NEON::BI__builtin_neon_vduph_lane_bf16:
12328 case NEON::BI__builtin_neon_vduph_lane_f16: {
12329 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12330 "vget_lane");
12331 }
12332 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12333 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12334 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12335 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12336 "vgetq_lane");
12337 }
12338
12339 case clang::AArch64::BI_InterlockedAdd:
12340 case clang::AArch64::BI_InterlockedAdd64: {
12341 Address DestAddr = CheckAtomicAlignment(*this, E);
12342 Value *Val = EmitScalarExpr(E->getArg(1));
12343 AtomicRMWInst *RMWI =
12344 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12345 llvm::AtomicOrdering::SequentiallyConsistent);
12346 return Builder.CreateAdd(RMWI, Val);
12347 }
12348 }
12349
12350 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12351 llvm::Type *Ty = VTy;
12352 if (!Ty)
12353 return nullptr;
12354
12355 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12356 // defer to common code if it's been added to our special map.
12359
12360 if (Builtin)
12362 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12363 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12364 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12365
12366 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12367 return V;
12368
12369 unsigned Int;
12370 switch (BuiltinID) {
12371 default: return nullptr;
12372 case NEON::BI__builtin_neon_vbsl_v:
12373 case NEON::BI__builtin_neon_vbslq_v: {
12374 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12375 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12376 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12377 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12378
12379 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12380 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12381 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12382 return Builder.CreateBitCast(Ops[0], Ty);
12383 }
12384 case NEON::BI__builtin_neon_vfma_lane_v:
12385 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12386 // The ARM builtins (and instructions) have the addend as the first
12387 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12388 Value *Addend = Ops[0];
12389 Value *Multiplicand = Ops[1];
12390 Value *LaneSource = Ops[2];
12391 Ops[0] = Multiplicand;
12392 Ops[1] = LaneSource;
12393 Ops[2] = Addend;
12394
12395 // Now adjust things to handle the lane access.
12396 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12397 ? llvm::FixedVectorType::get(VTy->getElementType(),
12398 VTy->getNumElements() / 2)
12399 : VTy;
12400 llvm::Constant *cst = cast<Constant>(Ops[3]);
12401 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12402 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12403 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12404
12405 Ops.pop_back();
12406 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12407 : Intrinsic::fma;
12408 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12409 }
12410 case NEON::BI__builtin_neon_vfma_laneq_v: {
12411 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12412 // v1f64 fma should be mapped to Neon scalar f64 fma
12413 if (VTy && VTy->getElementType() == DoubleTy) {
12414 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12415 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12416 llvm::FixedVectorType *VTy =
12418 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12419 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12420 Value *Result;
12422 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12423 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12424 return Builder.CreateBitCast(Result, Ty);
12425 }
12426 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12427 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12428
12429 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12430 VTy->getNumElements() * 2);
12431 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12432 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12433 cast<ConstantInt>(Ops[3]));
12434 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12435
12437 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12438 {Ops[2], Ops[1], Ops[0]});
12439 }
12440 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12441 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12442 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12443
12444 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12445 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12447 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12448 {Ops[2], Ops[1], Ops[0]});
12449 }
12450 case NEON::BI__builtin_neon_vfmah_lane_f16:
12451 case NEON::BI__builtin_neon_vfmas_lane_f32:
12452 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12453 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12454 case NEON::BI__builtin_neon_vfmad_lane_f64:
12455 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12456 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12457 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12458 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12460 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12461 {Ops[1], Ops[2], Ops[0]});
12462 }
12463 case NEON::BI__builtin_neon_vmull_v:
12464 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12465 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12466 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12467 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12468 case NEON::BI__builtin_neon_vmax_v:
12469 case NEON::BI__builtin_neon_vmaxq_v:
12470 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12471 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12472 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12473 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12474 case NEON::BI__builtin_neon_vmaxh_f16: {
12475 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12476 Int = Intrinsic::aarch64_neon_fmax;
12477 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12478 }
12479 case NEON::BI__builtin_neon_vmin_v:
12480 case NEON::BI__builtin_neon_vminq_v:
12481 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12482 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12483 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12484 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12485 case NEON::BI__builtin_neon_vminh_f16: {
12486 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12487 Int = Intrinsic::aarch64_neon_fmin;
12488 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12489 }
12490 case NEON::BI__builtin_neon_vabd_v:
12491 case NEON::BI__builtin_neon_vabdq_v:
12492 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12493 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12494 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12495 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12496 case NEON::BI__builtin_neon_vpadal_v:
12497 case NEON::BI__builtin_neon_vpadalq_v: {
12498 unsigned ArgElts = VTy->getNumElements();
12499 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12500 unsigned BitWidth = EltTy->getBitWidth();
12501 auto *ArgTy = llvm::FixedVectorType::get(
12502 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12503 llvm::Type* Tys[2] = { VTy, ArgTy };
12504 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12506 TmpOps.push_back(Ops[1]);
12507 Function *F = CGM.getIntrinsic(Int, Tys);
12508 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12509 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12510 return Builder.CreateAdd(tmp, addend);
12511 }
12512 case NEON::BI__builtin_neon_vpmin_v:
12513 case NEON::BI__builtin_neon_vpminq_v:
12514 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12515 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12516 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12517 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12518 case NEON::BI__builtin_neon_vpmax_v:
12519 case NEON::BI__builtin_neon_vpmaxq_v:
12520 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12521 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12522 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12523 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12524 case NEON::BI__builtin_neon_vminnm_v:
12525 case NEON::BI__builtin_neon_vminnmq_v:
12526 Int = Intrinsic::aarch64_neon_fminnm;
12527 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12528 case NEON::BI__builtin_neon_vminnmh_f16:
12529 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12530 Int = Intrinsic::aarch64_neon_fminnm;
12531 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12532 case NEON::BI__builtin_neon_vmaxnm_v:
12533 case NEON::BI__builtin_neon_vmaxnmq_v:
12534 Int = Intrinsic::aarch64_neon_fmaxnm;
12535 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12536 case NEON::BI__builtin_neon_vmaxnmh_f16:
12537 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12538 Int = Intrinsic::aarch64_neon_fmaxnm;
12539 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12540 case NEON::BI__builtin_neon_vrecpss_f32: {
12541 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12542 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12543 Ops, "vrecps");
12544 }
12545 case NEON::BI__builtin_neon_vrecpsd_f64:
12546 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12547 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12548 Ops, "vrecps");
12549 case NEON::BI__builtin_neon_vrecpsh_f16:
12550 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12551 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12552 Ops, "vrecps");
12553 case NEON::BI__builtin_neon_vqshrun_n_v:
12554 Int = Intrinsic::aarch64_neon_sqshrun;
12555 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12556 case NEON::BI__builtin_neon_vqrshrun_n_v:
12557 Int = Intrinsic::aarch64_neon_sqrshrun;
12558 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12559 case NEON::BI__builtin_neon_vqshrn_n_v:
12560 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12561 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12562 case NEON::BI__builtin_neon_vrshrn_n_v:
12563 Int = Intrinsic::aarch64_neon_rshrn;
12564 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12565 case NEON::BI__builtin_neon_vqrshrn_n_v:
12566 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12567 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12568 case NEON::BI__builtin_neon_vrndah_f16: {
12569 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12570 Int = Builder.getIsFPConstrained()
12571 ? Intrinsic::experimental_constrained_round
12572 : Intrinsic::round;
12573 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12574 }
12575 case NEON::BI__builtin_neon_vrnda_v:
12576 case NEON::BI__builtin_neon_vrndaq_v: {
12577 Int = Builder.getIsFPConstrained()
12578 ? Intrinsic::experimental_constrained_round
12579 : Intrinsic::round;
12580 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12581 }
12582 case NEON::BI__builtin_neon_vrndih_f16: {
12583 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12584 Int = Builder.getIsFPConstrained()
12585 ? Intrinsic::experimental_constrained_nearbyint
12586 : Intrinsic::nearbyint;
12587 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12588 }
12589 case NEON::BI__builtin_neon_vrndmh_f16: {
12590 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12591 Int = Builder.getIsFPConstrained()
12592 ? Intrinsic::experimental_constrained_floor
12593 : Intrinsic::floor;
12594 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12595 }
12596 case NEON::BI__builtin_neon_vrndm_v:
12597 case NEON::BI__builtin_neon_vrndmq_v: {
12598 Int = Builder.getIsFPConstrained()
12599 ? Intrinsic::experimental_constrained_floor
12600 : Intrinsic::floor;
12601 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12602 }
12603 case NEON::BI__builtin_neon_vrndnh_f16: {
12604 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12605 Int = Builder.getIsFPConstrained()
12606 ? Intrinsic::experimental_constrained_roundeven
12607 : Intrinsic::roundeven;
12608 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12609 }
12610 case NEON::BI__builtin_neon_vrndn_v:
12611 case NEON::BI__builtin_neon_vrndnq_v: {
12612 Int = Builder.getIsFPConstrained()
12613 ? Intrinsic::experimental_constrained_roundeven
12614 : Intrinsic::roundeven;
12615 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12616 }
12617 case NEON::BI__builtin_neon_vrndns_f32: {
12618 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12619 Int = Builder.getIsFPConstrained()
12620 ? Intrinsic::experimental_constrained_roundeven
12621 : Intrinsic::roundeven;
12622 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12623 }
12624 case NEON::BI__builtin_neon_vrndph_f16: {
12625 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12626 Int = Builder.getIsFPConstrained()
12627 ? Intrinsic::experimental_constrained_ceil
12628 : Intrinsic::ceil;
12629 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12630 }
12631 case NEON::BI__builtin_neon_vrndp_v:
12632 case NEON::BI__builtin_neon_vrndpq_v: {
12633 Int = Builder.getIsFPConstrained()
12634 ? Intrinsic::experimental_constrained_ceil
12635 : Intrinsic::ceil;
12636 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
12637 }
12638 case NEON::BI__builtin_neon_vrndxh_f16: {
12639 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12640 Int = Builder.getIsFPConstrained()
12641 ? Intrinsic::experimental_constrained_rint
12642 : Intrinsic::rint;
12643 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
12644 }
12645 case NEON::BI__builtin_neon_vrndx_v:
12646 case NEON::BI__builtin_neon_vrndxq_v: {
12647 Int = Builder.getIsFPConstrained()
12648 ? Intrinsic::experimental_constrained_rint
12649 : Intrinsic::rint;
12650 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
12651 }
12652 case NEON::BI__builtin_neon_vrndh_f16: {
12653 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12654 Int = Builder.getIsFPConstrained()
12655 ? Intrinsic::experimental_constrained_trunc
12656 : Intrinsic::trunc;
12657 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
12658 }
12659 case NEON::BI__builtin_neon_vrnd32x_f32:
12660 case NEON::BI__builtin_neon_vrnd32xq_f32:
12661 case NEON::BI__builtin_neon_vrnd32x_f64:
12662 case NEON::BI__builtin_neon_vrnd32xq_f64: {
12663 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12664 Int = Intrinsic::aarch64_neon_frint32x;
12665 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
12666 }
12667 case NEON::BI__builtin_neon_vrnd32z_f32:
12668 case NEON::BI__builtin_neon_vrnd32zq_f32:
12669 case NEON::BI__builtin_neon_vrnd32z_f64:
12670 case NEON::BI__builtin_neon_vrnd32zq_f64: {
12671 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12672 Int = Intrinsic::aarch64_neon_frint32z;
12673 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
12674 }
12675 case NEON::BI__builtin_neon_vrnd64x_f32:
12676 case NEON::BI__builtin_neon_vrnd64xq_f32:
12677 case NEON::BI__builtin_neon_vrnd64x_f64:
12678 case NEON::BI__builtin_neon_vrnd64xq_f64: {
12679 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12680 Int = Intrinsic::aarch64_neon_frint64x;
12681 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
12682 }
12683 case NEON::BI__builtin_neon_vrnd64z_f32:
12684 case NEON::BI__builtin_neon_vrnd64zq_f32:
12685 case NEON::BI__builtin_neon_vrnd64z_f64:
12686 case NEON::BI__builtin_neon_vrnd64zq_f64: {
12687 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12688 Int = Intrinsic::aarch64_neon_frint64z;
12689 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
12690 }
12691 case NEON::BI__builtin_neon_vrnd_v:
12692 case NEON::BI__builtin_neon_vrndq_v: {
12693 Int = Builder.getIsFPConstrained()
12694 ? Intrinsic::experimental_constrained_trunc
12695 : Intrinsic::trunc;
12696 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
12697 }
12698 case NEON::BI__builtin_neon_vcvt_f64_v:
12699 case NEON::BI__builtin_neon_vcvtq_f64_v:
12700 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12701 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
12702 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
12703 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
12704 case NEON::BI__builtin_neon_vcvt_f64_f32: {
12705 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
12706 "unexpected vcvt_f64_f32 builtin");
12707 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
12708 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12709
12710 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
12711 }
12712 case NEON::BI__builtin_neon_vcvt_f32_f64: {
12713 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
12714 "unexpected vcvt_f32_f64 builtin");
12715 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
12716 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12717
12718 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
12719 }
12720 case NEON::BI__builtin_neon_vcvt_s32_v:
12721 case NEON::BI__builtin_neon_vcvt_u32_v:
12722 case NEON::BI__builtin_neon_vcvt_s64_v:
12723 case NEON::BI__builtin_neon_vcvt_u64_v:
12724 case NEON::BI__builtin_neon_vcvt_s16_f16:
12725 case NEON::BI__builtin_neon_vcvt_u16_f16:
12726 case NEON::BI__builtin_neon_vcvtq_s32_v:
12727 case NEON::BI__builtin_neon_vcvtq_u32_v:
12728 case NEON::BI__builtin_neon_vcvtq_s64_v:
12729 case NEON::BI__builtin_neon_vcvtq_u64_v:
12730 case NEON::BI__builtin_neon_vcvtq_s16_f16:
12731 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
12732 Int =
12733 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
12734 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
12735 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
12736 }
12737 case NEON::BI__builtin_neon_vcvta_s16_f16:
12738 case NEON::BI__builtin_neon_vcvta_u16_f16:
12739 case NEON::BI__builtin_neon_vcvta_s32_v:
12740 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
12741 case NEON::BI__builtin_neon_vcvtaq_s32_v:
12742 case NEON::BI__builtin_neon_vcvta_u32_v:
12743 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
12744 case NEON::BI__builtin_neon_vcvtaq_u32_v:
12745 case NEON::BI__builtin_neon_vcvta_s64_v:
12746 case NEON::BI__builtin_neon_vcvtaq_s64_v:
12747 case NEON::BI__builtin_neon_vcvta_u64_v:
12748 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
12749 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
12750 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12751 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
12752 }
12753 case NEON::BI__builtin_neon_vcvtm_s16_f16:
12754 case NEON::BI__builtin_neon_vcvtm_s32_v:
12755 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
12756 case NEON::BI__builtin_neon_vcvtmq_s32_v:
12757 case NEON::BI__builtin_neon_vcvtm_u16_f16:
12758 case NEON::BI__builtin_neon_vcvtm_u32_v:
12759 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
12760 case NEON::BI__builtin_neon_vcvtmq_u32_v:
12761 case NEON::BI__builtin_neon_vcvtm_s64_v:
12762 case NEON::BI__builtin_neon_vcvtmq_s64_v:
12763 case NEON::BI__builtin_neon_vcvtm_u64_v:
12764 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
12765 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
12766 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12767 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
12768 }
12769 case NEON::BI__builtin_neon_vcvtn_s16_f16:
12770 case NEON::BI__builtin_neon_vcvtn_s32_v:
12771 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
12772 case NEON::BI__builtin_neon_vcvtnq_s32_v:
12773 case NEON::BI__builtin_neon_vcvtn_u16_f16:
12774 case NEON::BI__builtin_neon_vcvtn_u32_v:
12775 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
12776 case NEON::BI__builtin_neon_vcvtnq_u32_v:
12777 case NEON::BI__builtin_neon_vcvtn_s64_v:
12778 case NEON::BI__builtin_neon_vcvtnq_s64_v:
12779 case NEON::BI__builtin_neon_vcvtn_u64_v:
12780 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
12781 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
12782 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12783 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
12784 }
12785 case NEON::BI__builtin_neon_vcvtp_s16_f16:
12786 case NEON::BI__builtin_neon_vcvtp_s32_v:
12787 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
12788 case NEON::BI__builtin_neon_vcvtpq_s32_v:
12789 case NEON::BI__builtin_neon_vcvtp_u16_f16:
12790 case NEON::BI__builtin_neon_vcvtp_u32_v:
12791 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
12792 case NEON::BI__builtin_neon_vcvtpq_u32_v:
12793 case NEON::BI__builtin_neon_vcvtp_s64_v:
12794 case NEON::BI__builtin_neon_vcvtpq_s64_v:
12795 case NEON::BI__builtin_neon_vcvtp_u64_v:
12796 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
12797 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
12798 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12799 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
12800 }
12801 case NEON::BI__builtin_neon_vmulx_v:
12802 case NEON::BI__builtin_neon_vmulxq_v: {
12803 Int = Intrinsic::aarch64_neon_fmulx;
12804 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
12805 }
12806 case NEON::BI__builtin_neon_vmulxh_lane_f16:
12807 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
12808 // vmulx_lane should be mapped to Neon scalar mulx after
12809 // extracting the scalar element
12810 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12811 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12812 Ops.pop_back();
12813 Int = Intrinsic::aarch64_neon_fmulx;
12814 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
12815 }
12816 case NEON::BI__builtin_neon_vmul_lane_v:
12817 case NEON::BI__builtin_neon_vmul_laneq_v: {
12818 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
12819 bool Quad = false;
12820 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
12821 Quad = true;
12822 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12823 llvm::FixedVectorType *VTy =
12825 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12826 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12827 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
12828 return Builder.CreateBitCast(Result, Ty);
12829 }
12830 case NEON::BI__builtin_neon_vnegd_s64:
12831 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
12832 case NEON::BI__builtin_neon_vnegh_f16:
12833 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
12834 case NEON::BI__builtin_neon_vpmaxnm_v:
12835 case NEON::BI__builtin_neon_vpmaxnmq_v: {
12836 Int = Intrinsic::aarch64_neon_fmaxnmp;
12837 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
12838 }
12839 case NEON::BI__builtin_neon_vpminnm_v:
12840 case NEON::BI__builtin_neon_vpminnmq_v: {
12841 Int = Intrinsic::aarch64_neon_fminnmp;
12842 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
12843 }
12844 case NEON::BI__builtin_neon_vsqrth_f16: {
12845 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12846 Int = Builder.getIsFPConstrained()
12847 ? Intrinsic::experimental_constrained_sqrt
12848 : Intrinsic::sqrt;
12849 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
12850 }
12851 case NEON::BI__builtin_neon_vsqrt_v:
12852 case NEON::BI__builtin_neon_vsqrtq_v: {
12853 Int = Builder.getIsFPConstrained()
12854 ? Intrinsic::experimental_constrained_sqrt
12855 : Intrinsic::sqrt;
12856 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12857 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
12858 }
12859 case NEON::BI__builtin_neon_vrbit_v:
12860 case NEON::BI__builtin_neon_vrbitq_v: {
12861 Int = Intrinsic::bitreverse;
12862 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
12863 }
12864 case NEON::BI__builtin_neon_vaddv_u8:
12865 // FIXME: These are handled by the AArch64 scalar code.
12866 usgn = true;
12867 [[fallthrough]];
12868 case NEON::BI__builtin_neon_vaddv_s8: {
12869 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12870 Ty = Int32Ty;
12871 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12872 llvm::Type *Tys[2] = { Ty, VTy };
12873 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12874 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12875 return Builder.CreateTrunc(Ops[0], Int8Ty);
12876 }
12877 case NEON::BI__builtin_neon_vaddv_u16:
12878 usgn = true;
12879 [[fallthrough]];
12880 case NEON::BI__builtin_neon_vaddv_s16: {
12881 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12882 Ty = Int32Ty;
12883 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12884 llvm::Type *Tys[2] = { Ty, VTy };
12885 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12886 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12887 return Builder.CreateTrunc(Ops[0], Int16Ty);
12888 }
12889 case NEON::BI__builtin_neon_vaddvq_u8:
12890 usgn = true;
12891 [[fallthrough]];
12892 case NEON::BI__builtin_neon_vaddvq_s8: {
12893 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12894 Ty = Int32Ty;
12895 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12896 llvm::Type *Tys[2] = { Ty, VTy };
12897 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12898 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12899 return Builder.CreateTrunc(Ops[0], Int8Ty);
12900 }
12901 case NEON::BI__builtin_neon_vaddvq_u16:
12902 usgn = true;
12903 [[fallthrough]];
12904 case NEON::BI__builtin_neon_vaddvq_s16: {
12905 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12906 Ty = Int32Ty;
12907 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12908 llvm::Type *Tys[2] = { Ty, VTy };
12909 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12910 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12911 return Builder.CreateTrunc(Ops[0], Int16Ty);
12912 }
12913 case NEON::BI__builtin_neon_vmaxv_u8: {
12914 Int = Intrinsic::aarch64_neon_umaxv;
12915 Ty = Int32Ty;
12916 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12917 llvm::Type *Tys[2] = { Ty, VTy };
12918 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12919 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12920 return Builder.CreateTrunc(Ops[0], Int8Ty);
12921 }
12922 case NEON::BI__builtin_neon_vmaxv_u16: {
12923 Int = Intrinsic::aarch64_neon_umaxv;
12924 Ty = Int32Ty;
12925 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12926 llvm::Type *Tys[2] = { Ty, VTy };
12927 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12928 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12929 return Builder.CreateTrunc(Ops[0], Int16Ty);
12930 }
12931 case NEON::BI__builtin_neon_vmaxvq_u8: {
12932 Int = Intrinsic::aarch64_neon_umaxv;
12933 Ty = Int32Ty;
12934 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12935 llvm::Type *Tys[2] = { Ty, VTy };
12936 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12937 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12938 return Builder.CreateTrunc(Ops[0], Int8Ty);
12939 }
12940 case NEON::BI__builtin_neon_vmaxvq_u16: {
12941 Int = Intrinsic::aarch64_neon_umaxv;
12942 Ty = Int32Ty;
12943 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12944 llvm::Type *Tys[2] = { Ty, VTy };
12945 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12946 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12947 return Builder.CreateTrunc(Ops[0], Int16Ty);
12948 }
12949 case NEON::BI__builtin_neon_vmaxv_s8: {
12950 Int = Intrinsic::aarch64_neon_smaxv;
12951 Ty = Int32Ty;
12952 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12953 llvm::Type *Tys[2] = { Ty, VTy };
12954 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12955 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12956 return Builder.CreateTrunc(Ops[0], Int8Ty);
12957 }
12958 case NEON::BI__builtin_neon_vmaxv_s16: {
12959 Int = Intrinsic::aarch64_neon_smaxv;
12960 Ty = Int32Ty;
12961 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12962 llvm::Type *Tys[2] = { Ty, VTy };
12963 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12964 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12965 return Builder.CreateTrunc(Ops[0], Int16Ty);
12966 }
12967 case NEON::BI__builtin_neon_vmaxvq_s8: {
12968 Int = Intrinsic::aarch64_neon_smaxv;
12969 Ty = Int32Ty;
12970 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12971 llvm::Type *Tys[2] = { Ty, VTy };
12972 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12973 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12974 return Builder.CreateTrunc(Ops[0], Int8Ty);
12975 }
12976 case NEON::BI__builtin_neon_vmaxvq_s16: {
12977 Int = Intrinsic::aarch64_neon_smaxv;
12978 Ty = Int32Ty;
12979 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12980 llvm::Type *Tys[2] = { Ty, VTy };
12981 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12982 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12983 return Builder.CreateTrunc(Ops[0], Int16Ty);
12984 }
12985 case NEON::BI__builtin_neon_vmaxv_f16: {
12986 Int = Intrinsic::aarch64_neon_fmaxv;
12987 Ty = HalfTy;
12988 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12989 llvm::Type *Tys[2] = { Ty, VTy };
12990 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12991 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12992 return Builder.CreateTrunc(Ops[0], HalfTy);
12993 }
12994 case NEON::BI__builtin_neon_vmaxvq_f16: {
12995 Int = Intrinsic::aarch64_neon_fmaxv;
12996 Ty = HalfTy;
12997 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12998 llvm::Type *Tys[2] = { Ty, VTy };
12999 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13000 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13001 return Builder.CreateTrunc(Ops[0], HalfTy);
13002 }
13003 case NEON::BI__builtin_neon_vminv_u8: {
13004 Int = Intrinsic::aarch64_neon_uminv;
13005 Ty = Int32Ty;
13006 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13007 llvm::Type *Tys[2] = { Ty, VTy };
13008 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13009 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13010 return Builder.CreateTrunc(Ops[0], Int8Ty);
13011 }
13012 case NEON::BI__builtin_neon_vminv_u16: {
13013 Int = Intrinsic::aarch64_neon_uminv;
13014 Ty = Int32Ty;
13015 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13016 llvm::Type *Tys[2] = { Ty, VTy };
13017 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13018 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13019 return Builder.CreateTrunc(Ops[0], Int16Ty);
13020 }
13021 case NEON::BI__builtin_neon_vminvq_u8: {
13022 Int = Intrinsic::aarch64_neon_uminv;
13023 Ty = Int32Ty;
13024 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13025 llvm::Type *Tys[2] = { Ty, VTy };
13026 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13027 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13028 return Builder.CreateTrunc(Ops[0], Int8Ty);
13029 }
13030 case NEON::BI__builtin_neon_vminvq_u16: {
13031 Int = Intrinsic::aarch64_neon_uminv;
13032 Ty = Int32Ty;
13033 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13034 llvm::Type *Tys[2] = { Ty, VTy };
13035 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13036 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13037 return Builder.CreateTrunc(Ops[0], Int16Ty);
13038 }
13039 case NEON::BI__builtin_neon_vminv_s8: {
13040 Int = Intrinsic::aarch64_neon_sminv;
13041 Ty = Int32Ty;
13042 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13043 llvm::Type *Tys[2] = { Ty, VTy };
13044 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13045 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13046 return Builder.CreateTrunc(Ops[0], Int8Ty);
13047 }
13048 case NEON::BI__builtin_neon_vminv_s16: {
13049 Int = Intrinsic::aarch64_neon_sminv;
13050 Ty = Int32Ty;
13051 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13052 llvm::Type *Tys[2] = { Ty, VTy };
13053 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13054 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13055 return Builder.CreateTrunc(Ops[0], Int16Ty);
13056 }
13057 case NEON::BI__builtin_neon_vminvq_s8: {
13058 Int = Intrinsic::aarch64_neon_sminv;
13059 Ty = Int32Ty;
13060 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13061 llvm::Type *Tys[2] = { Ty, VTy };
13062 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13063 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13064 return Builder.CreateTrunc(Ops[0], Int8Ty);
13065 }
13066 case NEON::BI__builtin_neon_vminvq_s16: {
13067 Int = Intrinsic::aarch64_neon_sminv;
13068 Ty = Int32Ty;
13069 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13070 llvm::Type *Tys[2] = { Ty, VTy };
13071 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13072 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13073 return Builder.CreateTrunc(Ops[0], Int16Ty);
13074 }
13075 case NEON::BI__builtin_neon_vminv_f16: {
13076 Int = Intrinsic::aarch64_neon_fminv;
13077 Ty = HalfTy;
13078 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13079 llvm::Type *Tys[2] = { Ty, VTy };
13080 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13081 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13082 return Builder.CreateTrunc(Ops[0], HalfTy);
13083 }
13084 case NEON::BI__builtin_neon_vminvq_f16: {
13085 Int = Intrinsic::aarch64_neon_fminv;
13086 Ty = HalfTy;
13087 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13088 llvm::Type *Tys[2] = { Ty, VTy };
13089 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13090 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13091 return Builder.CreateTrunc(Ops[0], HalfTy);
13092 }
13093 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13094 Int = Intrinsic::aarch64_neon_fmaxnmv;
13095 Ty = HalfTy;
13096 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13097 llvm::Type *Tys[2] = { Ty, VTy };
13098 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13099 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13100 return Builder.CreateTrunc(Ops[0], HalfTy);
13101 }
13102 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13103 Int = Intrinsic::aarch64_neon_fmaxnmv;
13104 Ty = HalfTy;
13105 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13106 llvm::Type *Tys[2] = { Ty, VTy };
13107 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13108 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13109 return Builder.CreateTrunc(Ops[0], HalfTy);
13110 }
13111 case NEON::BI__builtin_neon_vminnmv_f16: {
13112 Int = Intrinsic::aarch64_neon_fminnmv;
13113 Ty = HalfTy;
13114 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13115 llvm::Type *Tys[2] = { Ty, VTy };
13116 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13117 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13118 return Builder.CreateTrunc(Ops[0], HalfTy);
13119 }
13120 case NEON::BI__builtin_neon_vminnmvq_f16: {
13121 Int = Intrinsic::aarch64_neon_fminnmv;
13122 Ty = HalfTy;
13123 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13124 llvm::Type *Tys[2] = { Ty, VTy };
13125 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13126 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13127 return Builder.CreateTrunc(Ops[0], HalfTy);
13128 }
13129 case NEON::BI__builtin_neon_vmul_n_f64: {
13130 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13131 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13132 return Builder.CreateFMul(Ops[0], RHS);
13133 }
13134 case NEON::BI__builtin_neon_vaddlv_u8: {
13135 Int = Intrinsic::aarch64_neon_uaddlv;
13136 Ty = Int32Ty;
13137 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13138 llvm::Type *Tys[2] = { Ty, VTy };
13139 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13140 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13141 return Builder.CreateTrunc(Ops[0], Int16Ty);
13142 }
13143 case NEON::BI__builtin_neon_vaddlv_u16: {
13144 Int = Intrinsic::aarch64_neon_uaddlv;
13145 Ty = Int32Ty;
13146 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13147 llvm::Type *Tys[2] = { Ty, VTy };
13148 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13149 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13150 }
13151 case NEON::BI__builtin_neon_vaddlvq_u8: {
13152 Int = Intrinsic::aarch64_neon_uaddlv;
13153 Ty = Int32Ty;
13154 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13155 llvm::Type *Tys[2] = { Ty, VTy };
13156 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13157 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13158 return Builder.CreateTrunc(Ops[0], Int16Ty);
13159 }
13160 case NEON::BI__builtin_neon_vaddlvq_u16: {
13161 Int = Intrinsic::aarch64_neon_uaddlv;
13162 Ty = Int32Ty;
13163 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13164 llvm::Type *Tys[2] = { Ty, VTy };
13165 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13166 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13167 }
13168 case NEON::BI__builtin_neon_vaddlv_s8: {
13169 Int = Intrinsic::aarch64_neon_saddlv;
13170 Ty = Int32Ty;
13171 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13172 llvm::Type *Tys[2] = { Ty, VTy };
13173 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13174 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13175 return Builder.CreateTrunc(Ops[0], Int16Ty);
13176 }
13177 case NEON::BI__builtin_neon_vaddlv_s16: {
13178 Int = Intrinsic::aarch64_neon_saddlv;
13179 Ty = Int32Ty;
13180 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13181 llvm::Type *Tys[2] = { Ty, VTy };
13182 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13183 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13184 }
13185 case NEON::BI__builtin_neon_vaddlvq_s8: {
13186 Int = Intrinsic::aarch64_neon_saddlv;
13187 Ty = Int32Ty;
13188 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13189 llvm::Type *Tys[2] = { Ty, VTy };
13190 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13191 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13192 return Builder.CreateTrunc(Ops[0], Int16Ty);
13193 }
13194 case NEON::BI__builtin_neon_vaddlvq_s16: {
13195 Int = Intrinsic::aarch64_neon_saddlv;
13196 Ty = Int32Ty;
13197 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13198 llvm::Type *Tys[2] = { Ty, VTy };
13199 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13200 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13201 }
13202 case NEON::BI__builtin_neon_vsri_n_v:
13203 case NEON::BI__builtin_neon_vsriq_n_v: {
13204 Int = Intrinsic::aarch64_neon_vsri;
13205 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13206 return EmitNeonCall(Intrin, Ops, "vsri_n");
13207 }
13208 case NEON::BI__builtin_neon_vsli_n_v:
13209 case NEON::BI__builtin_neon_vsliq_n_v: {
13210 Int = Intrinsic::aarch64_neon_vsli;
13211 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13212 return EmitNeonCall(Intrin, Ops, "vsli_n");
13213 }
13214 case NEON::BI__builtin_neon_vsra_n_v:
13215 case NEON::BI__builtin_neon_vsraq_n_v:
13216 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13217 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13218 return Builder.CreateAdd(Ops[0], Ops[1]);
13219 case NEON::BI__builtin_neon_vrsra_n_v:
13220 case NEON::BI__builtin_neon_vrsraq_n_v: {
13221 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13223 TmpOps.push_back(Ops[1]);
13224 TmpOps.push_back(Ops[2]);
13225 Function* F = CGM.getIntrinsic(Int, Ty);
13226 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13227 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13228 return Builder.CreateAdd(Ops[0], tmp);
13229 }
13230 case NEON::BI__builtin_neon_vld1_v:
13231 case NEON::BI__builtin_neon_vld1q_v: {
13232 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13233 }
13234 case NEON::BI__builtin_neon_vst1_v:
13235 case NEON::BI__builtin_neon_vst1q_v:
13236 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13237 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13238 case NEON::BI__builtin_neon_vld1_lane_v:
13239 case NEON::BI__builtin_neon_vld1q_lane_v: {
13240 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13241 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13242 PtrOp0.getAlignment());
13243 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13244 }
13245 case NEON::BI__builtin_neon_vldap1_lane_s64:
13246 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13247 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13248 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13249 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13250 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13251 Ops[0] = LI;
13252 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13253 }
13254 case NEON::BI__builtin_neon_vld1_dup_v:
13255 case NEON::BI__builtin_neon_vld1q_dup_v: {
13256 Value *V = PoisonValue::get(Ty);
13257 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13258 PtrOp0.getAlignment());
13259 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13260 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13261 return EmitNeonSplat(Ops[0], CI);
13262 }
13263 case NEON::BI__builtin_neon_vst1_lane_v:
13264 case NEON::BI__builtin_neon_vst1q_lane_v:
13265 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13266 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13267 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13268 case NEON::BI__builtin_neon_vstl1_lane_s64:
13269 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13270 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13271 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13272 llvm::StoreInst *SI =
13273 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13274 SI->setAtomic(llvm::AtomicOrdering::Release);
13275 return SI;
13276 }
13277 case NEON::BI__builtin_neon_vld2_v:
13278 case NEON::BI__builtin_neon_vld2q_v: {
13279 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13280 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13281 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13282 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13283 }
13284 case NEON::BI__builtin_neon_vld3_v:
13285 case NEON::BI__builtin_neon_vld3q_v: {
13286 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13287 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13288 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13289 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13290 }
13291 case NEON::BI__builtin_neon_vld4_v:
13292 case NEON::BI__builtin_neon_vld4q_v: {
13293 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13294 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13295 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13296 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13297 }
13298 case NEON::BI__builtin_neon_vld2_dup_v:
13299 case NEON::BI__builtin_neon_vld2q_dup_v: {
13300 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13301 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13302 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13303 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13304 }
13305 case NEON::BI__builtin_neon_vld3_dup_v:
13306 case NEON::BI__builtin_neon_vld3q_dup_v: {
13307 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13308 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13309 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13310 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13311 }
13312 case NEON::BI__builtin_neon_vld4_dup_v:
13313 case NEON::BI__builtin_neon_vld4q_dup_v: {
13314 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13315 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13316 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13317 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13318 }
13319 case NEON::BI__builtin_neon_vld2_lane_v:
13320 case NEON::BI__builtin_neon_vld2q_lane_v: {
13321 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13322 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13323 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13324 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13325 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13326 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13327 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13328 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13329 }
13330 case NEON::BI__builtin_neon_vld3_lane_v:
13331 case NEON::BI__builtin_neon_vld3q_lane_v: {
13332 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13333 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13334 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13335 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13336 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13337 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13338 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13339 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13340 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13341 }
13342 case NEON::BI__builtin_neon_vld4_lane_v:
13343 case NEON::BI__builtin_neon_vld4q_lane_v: {
13344 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13345 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13346 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13347 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13348 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13349 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13350 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13351 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13352 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13353 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13354 }
13355 case NEON::BI__builtin_neon_vst2_v:
13356 case NEON::BI__builtin_neon_vst2q_v: {
13357 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13358 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13359 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13360 Ops, "");
13361 }
13362 case NEON::BI__builtin_neon_vst2_lane_v:
13363 case NEON::BI__builtin_neon_vst2q_lane_v: {
13364 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13365 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13366 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13367 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13368 Ops, "");
13369 }
13370 case NEON::BI__builtin_neon_vst3_v:
13371 case NEON::BI__builtin_neon_vst3q_v: {
13372 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13373 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13374 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13375 Ops, "");
13376 }
13377 case NEON::BI__builtin_neon_vst3_lane_v:
13378 case NEON::BI__builtin_neon_vst3q_lane_v: {
13379 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13380 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13381 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13382 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13383 Ops, "");
13384 }
13385 case NEON::BI__builtin_neon_vst4_v:
13386 case NEON::BI__builtin_neon_vst4q_v: {
13387 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13388 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13389 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13390 Ops, "");
13391 }
13392 case NEON::BI__builtin_neon_vst4_lane_v:
13393 case NEON::BI__builtin_neon_vst4q_lane_v: {
13394 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13395 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13396 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13397 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13398 Ops, "");
13399 }
13400 case NEON::BI__builtin_neon_vtrn_v:
13401 case NEON::BI__builtin_neon_vtrnq_v: {
13402 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13403 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13404 Value *SV = nullptr;
13405
13406 for (unsigned vi = 0; vi != 2; ++vi) {
13407 SmallVector<int, 16> Indices;
13408 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13409 Indices.push_back(i+vi);
13410 Indices.push_back(i+e+vi);
13411 }
13412 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13413 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13414 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13415 }
13416 return SV;
13417 }
13418 case NEON::BI__builtin_neon_vuzp_v:
13419 case NEON::BI__builtin_neon_vuzpq_v: {
13420 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13421 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13422 Value *SV = nullptr;
13423
13424 for (unsigned vi = 0; vi != 2; ++vi) {
13425 SmallVector<int, 16> Indices;
13426 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13427 Indices.push_back(2*i+vi);
13428
13429 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13430 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13431 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13432 }
13433 return SV;
13434 }
13435 case NEON::BI__builtin_neon_vzip_v:
13436 case NEON::BI__builtin_neon_vzipq_v: {
13437 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13438 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13439 Value *SV = nullptr;
13440
13441 for (unsigned vi = 0; vi != 2; ++vi) {
13442 SmallVector<int, 16> Indices;
13443 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13444 Indices.push_back((i + vi*e) >> 1);
13445 Indices.push_back(((i + vi*e) >> 1)+e);
13446 }
13447 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13448 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13449 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13450 }
13451 return SV;
13452 }
13453 case NEON::BI__builtin_neon_vqtbl1q_v: {
13454 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13455 Ops, "vtbl1");
13456 }
13457 case NEON::BI__builtin_neon_vqtbl2q_v: {
13458 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13459 Ops, "vtbl2");
13460 }
13461 case NEON::BI__builtin_neon_vqtbl3q_v: {
13462 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13463 Ops, "vtbl3");
13464 }
13465 case NEON::BI__builtin_neon_vqtbl4q_v: {
13466 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13467 Ops, "vtbl4");
13468 }
13469 case NEON::BI__builtin_neon_vqtbx1q_v: {
13470 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13471 Ops, "vtbx1");
13472 }
13473 case NEON::BI__builtin_neon_vqtbx2q_v: {
13474 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13475 Ops, "vtbx2");
13476 }
13477 case NEON::BI__builtin_neon_vqtbx3q_v: {
13478 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13479 Ops, "vtbx3");
13480 }
13481 case NEON::BI__builtin_neon_vqtbx4q_v: {
13482 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13483 Ops, "vtbx4");
13484 }
13485 case NEON::BI__builtin_neon_vsqadd_v:
13486 case NEON::BI__builtin_neon_vsqaddq_v: {
13487 Int = Intrinsic::aarch64_neon_usqadd;
13488 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13489 }
13490 case NEON::BI__builtin_neon_vuqadd_v:
13491 case NEON::BI__builtin_neon_vuqaddq_v: {
13492 Int = Intrinsic::aarch64_neon_suqadd;
13493 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13494 }
13495 }
13496}
13497
13498Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13499 const CallExpr *E) {
13500 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13501 BuiltinID == BPF::BI__builtin_btf_type_id ||
13502 BuiltinID == BPF::BI__builtin_preserve_type_info ||
13503 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13504 "unexpected BPF builtin");
13505
13506 // A sequence number, injected into IR builtin functions, to
13507 // prevent CSE given the only difference of the function
13508 // may just be the debuginfo metadata.
13509 static uint32_t BuiltinSeqNum;
13510
13511 switch (BuiltinID) {
13512 default:
13513 llvm_unreachable("Unexpected BPF builtin");
13514 case BPF::BI__builtin_preserve_field_info: {
13515 const Expr *Arg = E->getArg(0);
13516 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13517
13518 if (!getDebugInfo()) {
13519 CGM.Error(E->getExprLoc(),
13520 "using __builtin_preserve_field_info() without -g");
13521 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13522 : EmitLValue(Arg).emitRawPointer(*this);
13523 }
13524
13525 // Enable underlying preserve_*_access_index() generation.
13526 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
13527 IsInPreservedAIRegion = true;
13528 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13529 : EmitLValue(Arg).emitRawPointer(*this);
13530 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
13531
13532 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13533 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
13534
13535 // Built the IR for the preserve_field_info intrinsic.
13536 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
13537 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
13538 {FieldAddr->getType()});
13539 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
13540 }
13541 case BPF::BI__builtin_btf_type_id:
13542 case BPF::BI__builtin_preserve_type_info: {
13543 if (!getDebugInfo()) {
13544 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13545 return nullptr;
13546 }
13547
13548 const Expr *Arg0 = E->getArg(0);
13549 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13550 Arg0->getType(), Arg0->getExprLoc());
13551
13552 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13553 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13554 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13555
13556 llvm::Function *FnDecl;
13557 if (BuiltinID == BPF::BI__builtin_btf_type_id)
13558 FnDecl = llvm::Intrinsic::getDeclaration(
13559 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
13560 else
13561 FnDecl = llvm::Intrinsic::getDeclaration(
13562 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
13563 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
13564 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13565 return Fn;
13566 }
13567 case BPF::BI__builtin_preserve_enum_value: {
13568 if (!getDebugInfo()) {
13569 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13570 return nullptr;
13571 }
13572
13573 const Expr *Arg0 = E->getArg(0);
13574 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13575 Arg0->getType(), Arg0->getExprLoc());
13576
13577 // Find enumerator
13578 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
13579 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
13580 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
13581 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
13582
13583 auto InitVal = Enumerator->getInitVal();
13584 std::string InitValStr;
13585 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
13586 InitValStr = std::to_string(InitVal.getSExtValue());
13587 else
13588 InitValStr = std::to_string(InitVal.getZExtValue());
13589 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
13590 Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
13591
13592 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13593 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13594 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13595
13596 llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
13597 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
13598 CallInst *Fn =
13599 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
13600 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13601 return Fn;
13602 }
13603 }
13604}
13605
13606llvm::Value *CodeGenFunction::
13608 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
13609 "Not a power-of-two sized vector!");
13610 bool AllConstants = true;
13611 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
13612 AllConstants &= isa<Constant>(Ops[i]);
13613
13614 // If this is a constant vector, create a ConstantVector.
13615 if (AllConstants) {
13617 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13618 CstOps.push_back(cast<Constant>(Ops[i]));
13619 return llvm::ConstantVector::get(CstOps);
13620 }
13621
13622 // Otherwise, insertelement the values to build the vector.
13623 Value *Result = llvm::PoisonValue::get(
13624 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
13625
13626 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13627 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
13628
13629 return Result;
13630}
13631
13632// Convert the mask from an integer type to a vector of i1.
13634 unsigned NumElts) {
13635
13636 auto *MaskTy = llvm::FixedVectorType::get(
13637 CGF.Builder.getInt1Ty(),
13638 cast<IntegerType>(Mask->getType())->getBitWidth());
13639 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
13640
13641 // If we have less than 8 elements, then the starting mask was an i8 and
13642 // we need to extract down to the right number of elements.
13643 if (NumElts < 8) {
13644 int Indices[4];
13645 for (unsigned i = 0; i != NumElts; ++i)
13646 Indices[i] = i;
13647 MaskVec = CGF.Builder.CreateShuffleVector(
13648 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
13649 }
13650 return MaskVec;
13651}
13652
13654 Align Alignment) {
13655 Value *Ptr = Ops[0];
13656
13657 Value *MaskVec = getMaskVecValue(
13658 CGF, Ops[2],
13659 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
13660
13661 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
13662}
13663
13665 Align Alignment) {
13666 llvm::Type *Ty = Ops[1]->getType();
13667 Value *Ptr = Ops[0];
13668
13669 Value *MaskVec = getMaskVecValue(
13670 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
13671
13672 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
13673}
13674
13676 ArrayRef<Value *> Ops) {
13677 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
13678 Value *Ptr = Ops[0];
13679
13680 Value *MaskVec = getMaskVecValue(
13681 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
13682
13683 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
13684 ResultTy);
13685 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
13686}
13687
13690 bool IsCompress) {
13691 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13692
13693 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13694
13695 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
13696 : Intrinsic::x86_avx512_mask_expand;
13697 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
13698 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
13699}
13700
13702 ArrayRef<Value *> Ops) {
13703 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13704 Value *Ptr = Ops[0];
13705
13706 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13707
13708 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
13709 ResultTy);
13710 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
13711}
13712
13713static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
13715 bool InvertLHS = false) {
13716 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13717 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
13718 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
13719
13720 if (InvertLHS)
13721 LHS = CGF.Builder.CreateNot(LHS);
13722
13723 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
13724 Ops[0]->getType());
13725}
13726
13728 Value *Amt, bool IsRight) {
13729 llvm::Type *Ty = Op0->getType();
13730
13731 // Amount may be scalar immediate, in which case create a splat vector.
13732 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
13733 // we only care about the lowest log2 bits anyway.
13734 if (Amt->getType() != Ty) {
13735 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
13736 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
13737 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
13738 }
13739
13740 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
13741 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
13742 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
13743}
13744
13746 bool IsSigned) {
13747 Value *Op0 = Ops[0];
13748 Value *Op1 = Ops[1];
13749 llvm::Type *Ty = Op0->getType();
13750 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
13751
13752 CmpInst::Predicate Pred;
13753 switch (Imm) {
13754 case 0x0:
13755 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
13756 break;
13757 case 0x1:
13758 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
13759 break;
13760 case 0x2:
13761 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13762 break;
13763 case 0x3:
13764 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13765 break;
13766 case 0x4:
13767 Pred = ICmpInst::ICMP_EQ;
13768 break;
13769 case 0x5:
13770 Pred = ICmpInst::ICMP_NE;
13771 break;
13772 case 0x6:
13773 return llvm::Constant::getNullValue(Ty); // FALSE
13774 case 0x7:
13775 return llvm::Constant::getAllOnesValue(Ty); // TRUE
13776 default:
13777 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
13778 }
13779
13780 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
13781 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
13782 return Res;
13783}
13784
13786 Value *Mask, Value *Op0, Value *Op1) {
13787
13788 // If the mask is all ones just return first argument.
13789 if (const auto *C = dyn_cast<Constant>(Mask))
13790 if (C->isAllOnesValue())
13791 return Op0;
13792
13793 Mask = getMaskVecValue(
13794 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
13795
13796 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13797}
13798
13800 Value *Mask, Value *Op0, Value *Op1) {
13801 // If the mask is all ones just return first argument.
13802 if (const auto *C = dyn_cast<Constant>(Mask))
13803 if (C->isAllOnesValue())
13804 return Op0;
13805
13806 auto *MaskTy = llvm::FixedVectorType::get(
13807 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
13808 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
13809 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
13810 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13811}
13812
13814 unsigned NumElts, Value *MaskIn) {
13815 if (MaskIn) {
13816 const auto *C = dyn_cast<Constant>(MaskIn);
13817 if (!C || !C->isAllOnesValue())
13818 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
13819 }
13820
13821 if (NumElts < 8) {
13822 int Indices[8];
13823 for (unsigned i = 0; i != NumElts; ++i)
13824 Indices[i] = i;
13825 for (unsigned i = NumElts; i != 8; ++i)
13826 Indices[i] = i % NumElts + NumElts;
13827 Cmp = CGF.Builder.CreateShuffleVector(
13828 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
13829 }
13830
13831 return CGF.Builder.CreateBitCast(Cmp,
13832 IntegerType::get(CGF.getLLVMContext(),
13833 std::max(NumElts, 8U)));
13834}
13835
13837 bool Signed, ArrayRef<Value *> Ops) {
13838 assert((Ops.size() == 2 || Ops.size() == 4) &&
13839 "Unexpected number of arguments");
13840 unsigned NumElts =
13841 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13842 Value *Cmp;
13843
13844 if (CC == 3) {
13845 Cmp = Constant::getNullValue(
13846 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13847 } else if (CC == 7) {
13848 Cmp = Constant::getAllOnesValue(
13849 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13850 } else {
13851 ICmpInst::Predicate Pred;
13852 switch (CC) {
13853 default: llvm_unreachable("Unknown condition code");
13854 case 0: Pred = ICmpInst::ICMP_EQ; break;
13855 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
13856 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
13857 case 4: Pred = ICmpInst::ICMP_NE; break;
13858 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
13859 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
13860 }
13861 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
13862 }
13863
13864 Value *MaskIn = nullptr;
13865 if (Ops.size() == 4)
13866 MaskIn = Ops[3];
13867
13868 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
13869}
13870
13872 Value *Zero = Constant::getNullValue(In->getType());
13873 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
13874}
13875
13877 ArrayRef<Value *> Ops, bool IsSigned) {
13878 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
13879 llvm::Type *Ty = Ops[1]->getType();
13880
13881 Value *Res;
13882 if (Rnd != 4) {
13883 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
13884 : Intrinsic::x86_avx512_uitofp_round;
13885 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
13886 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
13887 } else {
13888 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13889 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
13890 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
13891 }
13892
13893 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13894}
13895
13896// Lowers X86 FMA intrinsics to IR.
13898 ArrayRef<Value *> Ops, unsigned BuiltinID,
13899 bool IsAddSub) {
13900
13901 bool Subtract = false;
13902 Intrinsic::ID IID = Intrinsic::not_intrinsic;
13903 switch (BuiltinID) {
13904 default: break;
13905 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13906 Subtract = true;
13907 [[fallthrough]];
13908 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13909 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13910 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13911 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
13912 break;
13913 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13914 Subtract = true;
13915 [[fallthrough]];
13916 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13917 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13918 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13919 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
13920 break;
13921 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13922 Subtract = true;
13923 [[fallthrough]];
13924 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13925 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13926 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13927 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13928 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13929 Subtract = true;
13930 [[fallthrough]];
13931 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13932 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13933 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13934 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13935 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13936 Subtract = true;
13937 [[fallthrough]];
13938 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13939 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13940 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13941 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13942 break;
13943 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13944 Subtract = true;
13945 [[fallthrough]];
13946 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13947 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13948 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13949 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13950 break;
13951 }
13952
13953 Value *A = Ops[0];
13954 Value *B = Ops[1];
13955 Value *C = Ops[2];
13956
13957 if (Subtract)
13958 C = CGF.Builder.CreateFNeg(C);
13959
13960 Value *Res;
13961
13962 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
13963 if (IID != Intrinsic::not_intrinsic &&
13964 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
13965 IsAddSub)) {
13966 Function *Intr = CGF.CGM.getIntrinsic(IID);
13967 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
13968 } else {
13969 llvm::Type *Ty = A->getType();
13970 Function *FMA;
13971 if (CGF.Builder.getIsFPConstrained()) {
13972 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13973 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
13974 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
13975 } else {
13976 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
13977 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
13978 }
13979 }
13980
13981 // Handle any required masking.
13982 Value *MaskFalseVal = nullptr;
13983 switch (BuiltinID) {
13984 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13985 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13986 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13987 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13988 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13989 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13990 MaskFalseVal = Ops[0];
13991 break;
13992 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13993 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13994 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13995 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13996 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13997 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13998 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
13999 break;
14000 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14001 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14002 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14003 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14004 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14005 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14006 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14007 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14008 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14009 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14010 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14011 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14012 MaskFalseVal = Ops[2];
14013 break;
14014 }
14015
14016 if (MaskFalseVal)
14017 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14018
14019 return Res;
14020}
14021
14023 MutableArrayRef<Value *> Ops, Value *Upper,
14024 bool ZeroMask = false, unsigned PTIdx = 0,
14025 bool NegAcc = false) {
14026 unsigned Rnd = 4;
14027 if (Ops.size() > 4)
14028 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14029
14030 if (NegAcc)
14031 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14032
14033 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14034 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14035 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14036 Value *Res;
14037 if (Rnd != 4) {
14038 Intrinsic::ID IID;
14039
14040 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14041 case 16:
14042 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14043 break;
14044 case 32:
14045 IID = Intrinsic::x86_avx512_vfmadd_f32;
14046 break;
14047 case 64:
14048 IID = Intrinsic::x86_avx512_vfmadd_f64;
14049 break;
14050 default:
14051 llvm_unreachable("Unexpected size");
14052 }
14053 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14054 {Ops[0], Ops[1], Ops[2], Ops[4]});
14055 } else if (CGF.Builder.getIsFPConstrained()) {
14056 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14057 Function *FMA = CGF.CGM.getIntrinsic(
14058 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14059 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14060 } else {
14061 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14062 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14063 }
14064 // If we have more than 3 arguments, we need to do masking.
14065 if (Ops.size() > 3) {
14066 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14067 : Ops[PTIdx];
14068
14069 // If we negated the accumulator and the its the PassThru value we need to
14070 // bypass the negate. Conveniently Upper should be the same thing in this
14071 // case.
14072 if (NegAcc && PTIdx == 2)
14073 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14074
14075 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14076 }
14077 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14078}
14079
14080static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14081 ArrayRef<Value *> Ops) {
14082 llvm::Type *Ty = Ops[0]->getType();
14083 // Arguments have a vXi32 type so cast to vXi64.
14084 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14085 Ty->getPrimitiveSizeInBits() / 64);
14086 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14087 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14088
14089 if (IsSigned) {
14090 // Shift left then arithmetic shift right.
14091 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14092 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14093 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14094 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14095 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14096 } else {
14097 // Clear the upper bits.
14098 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14099 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14100 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14101 }
14102
14103 return CGF.Builder.CreateMul(LHS, RHS);
14104}
14105
14106// Emit a masked pternlog intrinsic. This only exists because the header has to
14107// use a macro and we aren't able to pass the input argument to a pternlog
14108// builtin and a select builtin without evaluating it twice.
14109static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14110 ArrayRef<Value *> Ops) {
14111 llvm::Type *Ty = Ops[0]->getType();
14112
14113 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14114 unsigned EltWidth = Ty->getScalarSizeInBits();
14115 Intrinsic::ID IID;
14116 if (VecWidth == 128 && EltWidth == 32)
14117 IID = Intrinsic::x86_avx512_pternlog_d_128;
14118 else if (VecWidth == 256 && EltWidth == 32)
14119 IID = Intrinsic::x86_avx512_pternlog_d_256;
14120 else if (VecWidth == 512 && EltWidth == 32)
14121 IID = Intrinsic::x86_avx512_pternlog_d_512;
14122 else if (VecWidth == 128 && EltWidth == 64)
14123 IID = Intrinsic::x86_avx512_pternlog_q_128;
14124 else if (VecWidth == 256 && EltWidth == 64)
14125 IID = Intrinsic::x86_avx512_pternlog_q_256;
14126 else if (VecWidth == 512 && EltWidth == 64)
14127 IID = Intrinsic::x86_avx512_pternlog_q_512;
14128 else
14129 llvm_unreachable("Unexpected intrinsic");
14130
14131 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14132 Ops.drop_back());
14133 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14134 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14135}
14136
14138 llvm::Type *DstTy) {
14139 unsigned NumberOfElements =
14140 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14141 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14142 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14143}
14144
14145Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14146 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14147 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14148 return EmitX86CpuIs(CPUStr);
14149}
14150
14151// Convert F16 halfs to floats.
14154 llvm::Type *DstTy) {
14155 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14156 "Unknown cvtph2ps intrinsic");
14157
14158 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14159 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14160 Function *F =
14161 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14162 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14163 }
14164
14165 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14166 Value *Src = Ops[0];
14167
14168 // Extract the subvector.
14169 if (NumDstElts !=
14170 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14171 assert(NumDstElts == 4 && "Unexpected vector size");
14172 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14173 }
14174
14175 // Bitcast from vXi16 to vXf16.
14176 auto *HalfTy = llvm::FixedVectorType::get(
14177 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14178 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14179
14180 // Perform the fp-extension.
14181 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14182
14183 if (Ops.size() >= 3)
14184 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14185 return Res;
14186}
14187
14188Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14189
14190 llvm::Type *Int32Ty = Builder.getInt32Ty();
14191
14192 // Matching the struct layout from the compiler-rt/libgcc structure that is
14193 // filled in:
14194 // unsigned int __cpu_vendor;
14195 // unsigned int __cpu_type;
14196 // unsigned int __cpu_subtype;
14197 // unsigned int __cpu_features[1];
14198 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14199 llvm::ArrayType::get(Int32Ty, 1));
14200
14201 // Grab the global __cpu_model.
14202 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14203 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14204
14205 // Calculate the index needed to access the correct field based on the
14206 // range. Also adjust the expected value.
14207 unsigned Index;
14208 unsigned Value;
14209 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14210#define X86_VENDOR(ENUM, STRING) \
14211 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14212#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14213 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14214#define X86_CPU_TYPE(ENUM, STR) \
14215 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14216#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14217 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14218#define X86_CPU_SUBTYPE(ENUM, STR) \
14219 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14220#include "llvm/TargetParser/X86TargetParser.def"
14221 .Default({0, 0});
14222 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14223
14224 // Grab the appropriate field from __cpu_model.
14225 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14226 ConstantInt::get(Int32Ty, Index)};
14227 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14228 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14230
14231 // Check the value of the field against the requested value.
14232 return Builder.CreateICmpEQ(CpuValue,
14233 llvm::ConstantInt::get(Int32Ty, Value));
14234}
14235
14236Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14237 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14238 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14239 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14240 return Builder.getFalse();
14241 return EmitX86CpuSupports(FeatureStr);
14242}
14243
14244Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14245 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14246}
14247
14248llvm::Value *
14249CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14250 Value *Result = Builder.getTrue();
14251 if (FeatureMask[0] != 0) {
14252 // Matching the struct layout from the compiler-rt/libgcc structure that is
14253 // filled in:
14254 // unsigned int __cpu_vendor;
14255 // unsigned int __cpu_type;
14256 // unsigned int __cpu_subtype;
14257 // unsigned int __cpu_features[1];
14258 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14259 llvm::ArrayType::get(Int32Ty, 1));
14260
14261 // Grab the global __cpu_model.
14262 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14263 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14264
14265 // Grab the first (0th) element from the field __cpu_features off of the
14266 // global in the struct STy.
14267 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14268 Builder.getInt32(0)};
14269 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14270 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14272
14273 // Check the value of the bit corresponding to the feature requested.
14274 Value *Mask = Builder.getInt32(FeatureMask[0]);
14275 Value *Bitset = Builder.CreateAnd(Features, Mask);
14276 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14277 Result = Builder.CreateAnd(Result, Cmp);
14278 }
14279
14280 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14281 llvm::Constant *CpuFeatures2 =
14282 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14283 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14284 for (int i = 1; i != 4; ++i) {
14285 const uint32_t M = FeatureMask[i];
14286 if (!M)
14287 continue;
14288 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14289 Value *Features = Builder.CreateAlignedLoad(
14290 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
14292 // Check the value of the bit corresponding to the feature requested.
14293 Value *Mask = Builder.getInt32(M);
14294 Value *Bitset = Builder.CreateAnd(Features, Mask);
14295 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14296 Result = Builder.CreateAnd(Result, Cmp);
14297 }
14298
14299 return Result;
14300}
14301
14302Value *CodeGenFunction::EmitAArch64CpuInit() {
14303 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14304 llvm::FunctionCallee Func =
14305 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14306 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14307 cast<llvm::GlobalValue>(Func.getCallee())
14308 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14309 return Builder.CreateCall(Func);
14310}
14311
14313 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14314 llvm::FunctionCallee Func =
14315 CGM.CreateRuntimeFunction(FTy, "__init_riscv_feature_bits");
14316 auto *CalleeGV = cast<llvm::GlobalValue>(Func.getCallee());
14317 CalleeGV->setDSOLocal(true);
14318 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14319 return Builder.CreateCall(Func);
14320}
14321
14322Value *CodeGenFunction::EmitX86CpuInit() {
14323 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14324 /*Variadic*/ false);
14325 llvm::FunctionCallee Func =
14326 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14327 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14328 cast<llvm::GlobalValue>(Func.getCallee())
14329 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14330 return Builder.CreateCall(Func);
14331}
14332
14333Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14334 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14335 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14337 ArgStr.split(Features, "+");
14338 for (auto &Feature : Features) {
14339 Feature = Feature.trim();
14340 if (!llvm::AArch64::parseFMVExtension(Feature))
14341 return Builder.getFalse();
14342 if (Feature != "default")
14343 Features.push_back(Feature);
14344 }
14345 return EmitAArch64CpuSupports(Features);
14346}
14347
14348llvm::Value *
14349CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14350 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14351 Value *Result = Builder.getTrue();
14352 if (FeaturesMask != 0) {
14353 // Get features from structure in runtime library
14354 // struct {
14355 // unsigned long long features;
14356 // } __aarch64_cpu_features;
14357 llvm::Type *STy = llvm::StructType::get(Int64Ty);
14358 llvm::Constant *AArch64CPUFeatures =
14359 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14360 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14361 llvm::Value *CpuFeatures = Builder.CreateGEP(
14362 STy, AArch64CPUFeatures,
14363 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14364 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14366 Value *Mask = Builder.getInt64(FeaturesMask);
14367 Value *Bitset = Builder.CreateAnd(Features, Mask);
14368 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14369 Result = Builder.CreateAnd(Result, Cmp);
14370 }
14371 return Result;
14372}
14373
14375
14376 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14377 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14378 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14379 return Builder.getFalse();
14380
14381 // Note: We are making an unchecked assumption that the size of the
14382 // feature array is >= 1. This holds for any version of compiler-rt
14383 // which defines this interface.
14384 llvm::ArrayType *ArrayOfInt64Ty = llvm::ArrayType::get(Int64Ty, 1);
14385 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
14386 llvm::Constant *RISCVFeaturesBits =
14387 CGM.CreateRuntimeVariable(StructTy, "__riscv_feature_bits");
14388 auto *GV = cast<llvm::GlobalValue>(RISCVFeaturesBits);
14389 GV->setDSOLocal(true);
14390
14391 auto LoadFeatureBit = [&](unsigned Index) {
14392 // Create GEP then load.
14393 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
14394 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
14395 IndexVal};
14396 Value *Ptr =
14397 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
14398 Value *FeaturesBit =
14400 return FeaturesBit;
14401 };
14402
14403 int BitPos = RISCVISAInfo::getRISCVFeaturesBitPosition(FeatureStr);
14404 assert(BitPos != -1 && "validation should have rejected this feature");
14405 Value *MaskV = Builder.getInt64(1ULL << BitPos);
14406 Value *Bitset = Builder.CreateAnd(LoadFeatureBit(0), MaskV);
14407 return Builder.CreateICmpEQ(Bitset, MaskV);
14408}
14409
14410Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14411 const CallExpr *E) {
14412 if (BuiltinID == Builtin::BI__builtin_cpu_is)
14413 return EmitX86CpuIs(E);
14414 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14415 return EmitX86CpuSupports(E);
14416 if (BuiltinID == Builtin::BI__builtin_cpu_init)
14417 return EmitX86CpuInit();
14418
14419 // Handle MSVC intrinsics before argument evaluation to prevent double
14420 // evaluation.
14421 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
14422 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
14423
14425 bool IsMaskFCmp = false;
14426 bool IsConjFMA = false;
14427
14428 // Find out if any arguments are required to be integer constant expressions.
14429 unsigned ICEArguments = 0;
14431 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
14432 assert(Error == ASTContext::GE_None && "Should not codegen an error");
14433
14434 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
14435 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
14436 }
14437
14438 // These exist so that the builtin that takes an immediate can be bounds
14439 // checked by clang to avoid passing bad immediates to the backend. Since
14440 // AVX has a larger immediate than SSE we would need separate builtins to
14441 // do the different bounds checking. Rather than create a clang specific
14442 // SSE only builtin, this implements eight separate builtins to match gcc
14443 // implementation.
14444 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
14445 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
14446 llvm::Function *F = CGM.getIntrinsic(ID);
14447 return Builder.CreateCall(F, Ops);
14448 };
14449
14450 // For the vector forms of FP comparisons, translate the builtins directly to
14451 // IR.
14452 // TODO: The builtins could be removed if the SSE header files used vector
14453 // extension comparisons directly (vector ordered/unordered may need
14454 // additional support via __builtin_isnan()).
14455 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
14456 bool IsSignaling) {
14457 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14458 Value *Cmp;
14459 if (IsSignaling)
14460 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
14461 else
14462 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
14463 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
14464 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
14465 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
14466 return Builder.CreateBitCast(Sext, FPVecTy);
14467 };
14468
14469 switch (BuiltinID) {
14470 default: return nullptr;
14471 case X86::BI_mm_prefetch: {
14472 Value *Address = Ops[0];
14473 ConstantInt *C = cast<ConstantInt>(Ops[1]);
14474 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
14475 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
14476 Value *Data = ConstantInt::get(Int32Ty, 1);
14477 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
14478 return Builder.CreateCall(F, {Address, RW, Locality, Data});
14479 }
14480 case X86::BI_mm_clflush: {
14481 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
14482 Ops[0]);
14483 }
14484 case X86::BI_mm_lfence: {
14485 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
14486 }
14487 case X86::BI_mm_mfence: {
14488 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
14489 }
14490 case X86::BI_mm_sfence: {
14491 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
14492 }
14493 case X86::BI_mm_pause: {
14494 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
14495 }
14496 case X86::BI__rdtsc: {
14497 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
14498 }
14499 case X86::BI__builtin_ia32_rdtscp: {
14500 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
14501 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
14502 Ops[0]);
14503 return Builder.CreateExtractValue(Call, 0);
14504 }
14505 case X86::BI__builtin_ia32_lzcnt_u16:
14506 case X86::BI__builtin_ia32_lzcnt_u32:
14507 case X86::BI__builtin_ia32_lzcnt_u64: {
14508 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14509 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14510 }
14511 case X86::BI__builtin_ia32_tzcnt_u16:
14512 case X86::BI__builtin_ia32_tzcnt_u32:
14513 case X86::BI__builtin_ia32_tzcnt_u64: {
14514 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
14515 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14516 }
14517 case X86::BI__builtin_ia32_undef128:
14518 case X86::BI__builtin_ia32_undef256:
14519 case X86::BI__builtin_ia32_undef512:
14520 // The x86 definition of "undef" is not the same as the LLVM definition
14521 // (PR32176). We leave optimizing away an unnecessary zero constant to the
14522 // IR optimizer and backend.
14523 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
14524 // value, we should use that here instead of a zero.
14525 return llvm::Constant::getNullValue(ConvertType(E->getType()));
14526 case X86::BI__builtin_ia32_vec_ext_v4hi:
14527 case X86::BI__builtin_ia32_vec_ext_v16qi:
14528 case X86::BI__builtin_ia32_vec_ext_v8hi:
14529 case X86::BI__builtin_ia32_vec_ext_v4si:
14530 case X86::BI__builtin_ia32_vec_ext_v4sf:
14531 case X86::BI__builtin_ia32_vec_ext_v2di:
14532 case X86::BI__builtin_ia32_vec_ext_v32qi:
14533 case X86::BI__builtin_ia32_vec_ext_v16hi:
14534 case X86::BI__builtin_ia32_vec_ext_v8si:
14535 case X86::BI__builtin_ia32_vec_ext_v4di: {
14536 unsigned NumElts =
14537 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14538 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14539 Index &= NumElts - 1;
14540 // These builtins exist so we can ensure the index is an ICE and in range.
14541 // Otherwise we could just do this in the header file.
14542 return Builder.CreateExtractElement(Ops[0], Index);
14543 }
14544 case X86::BI__builtin_ia32_vec_set_v4hi:
14545 case X86::BI__builtin_ia32_vec_set_v16qi:
14546 case X86::BI__builtin_ia32_vec_set_v8hi:
14547 case X86::BI__builtin_ia32_vec_set_v4si:
14548 case X86::BI__builtin_ia32_vec_set_v2di:
14549 case X86::BI__builtin_ia32_vec_set_v32qi:
14550 case X86::BI__builtin_ia32_vec_set_v16hi:
14551 case X86::BI__builtin_ia32_vec_set_v8si:
14552 case X86::BI__builtin_ia32_vec_set_v4di: {
14553 unsigned NumElts =
14554 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14555 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14556 Index &= NumElts - 1;
14557 // These builtins exist so we can ensure the index is an ICE and in range.
14558 // Otherwise we could just do this in the header file.
14559 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
14560 }
14561 case X86::BI_mm_setcsr:
14562 case X86::BI__builtin_ia32_ldmxcsr: {
14563 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
14564 Builder.CreateStore(Ops[0], Tmp);
14565 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
14566 Tmp.getPointer());
14567 }
14568 case X86::BI_mm_getcsr:
14569 case X86::BI__builtin_ia32_stmxcsr: {
14571 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
14572 Tmp.getPointer());
14573 return Builder.CreateLoad(Tmp, "stmxcsr");
14574 }
14575 case X86::BI__builtin_ia32_xsave:
14576 case X86::BI__builtin_ia32_xsave64:
14577 case X86::BI__builtin_ia32_xrstor:
14578 case X86::BI__builtin_ia32_xrstor64:
14579 case X86::BI__builtin_ia32_xsaveopt:
14580 case X86::BI__builtin_ia32_xsaveopt64:
14581 case X86::BI__builtin_ia32_xrstors:
14582 case X86::BI__builtin_ia32_xrstors64:
14583 case X86::BI__builtin_ia32_xsavec:
14584 case X86::BI__builtin_ia32_xsavec64:
14585 case X86::BI__builtin_ia32_xsaves:
14586 case X86::BI__builtin_ia32_xsaves64:
14587 case X86::BI__builtin_ia32_xsetbv:
14588 case X86::BI_xsetbv: {
14589 Intrinsic::ID ID;
14590#define INTRINSIC_X86_XSAVE_ID(NAME) \
14591 case X86::BI__builtin_ia32_##NAME: \
14592 ID = Intrinsic::x86_##NAME; \
14593 break
14594 switch (BuiltinID) {
14595 default: llvm_unreachable("Unsupported intrinsic!");
14597 INTRINSIC_X86_XSAVE_ID(xsave64);
14598 INTRINSIC_X86_XSAVE_ID(xrstor);
14599 INTRINSIC_X86_XSAVE_ID(xrstor64);
14600 INTRINSIC_X86_XSAVE_ID(xsaveopt);
14601 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
14602 INTRINSIC_X86_XSAVE_ID(xrstors);
14603 INTRINSIC_X86_XSAVE_ID(xrstors64);
14604 INTRINSIC_X86_XSAVE_ID(xsavec);
14605 INTRINSIC_X86_XSAVE_ID(xsavec64);
14606 INTRINSIC_X86_XSAVE_ID(xsaves);
14607 INTRINSIC_X86_XSAVE_ID(xsaves64);
14608 INTRINSIC_X86_XSAVE_ID(xsetbv);
14609 case X86::BI_xsetbv:
14610 ID = Intrinsic::x86_xsetbv;
14611 break;
14612 }
14613#undef INTRINSIC_X86_XSAVE_ID
14614 Value *Mhi = Builder.CreateTrunc(
14615 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
14616 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
14617 Ops[1] = Mhi;
14618 Ops.push_back(Mlo);
14619 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14620 }
14621 case X86::BI__builtin_ia32_xgetbv:
14622 case X86::BI_xgetbv:
14623 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
14624 case X86::BI__builtin_ia32_storedqudi128_mask:
14625 case X86::BI__builtin_ia32_storedqusi128_mask:
14626 case X86::BI__builtin_ia32_storedquhi128_mask:
14627 case X86::BI__builtin_ia32_storedquqi128_mask:
14628 case X86::BI__builtin_ia32_storeupd128_mask:
14629 case X86::BI__builtin_ia32_storeups128_mask:
14630 case X86::BI__builtin_ia32_storedqudi256_mask:
14631 case X86::BI__builtin_ia32_storedqusi256_mask:
14632 case X86::BI__builtin_ia32_storedquhi256_mask:
14633 case X86::BI__builtin_ia32_storedquqi256_mask:
14634 case X86::BI__builtin_ia32_storeupd256_mask:
14635 case X86::BI__builtin_ia32_storeups256_mask:
14636 case X86::BI__builtin_ia32_storedqudi512_mask:
14637 case X86::BI__builtin_ia32_storedqusi512_mask:
14638 case X86::BI__builtin_ia32_storedquhi512_mask:
14639 case X86::BI__builtin_ia32_storedquqi512_mask:
14640 case X86::BI__builtin_ia32_storeupd512_mask:
14641 case X86::BI__builtin_ia32_storeups512_mask:
14642 return EmitX86MaskedStore(*this, Ops, Align(1));
14643
14644 case X86::BI__builtin_ia32_storesh128_mask:
14645 case X86::BI__builtin_ia32_storess128_mask:
14646 case X86::BI__builtin_ia32_storesd128_mask:
14647 return EmitX86MaskedStore(*this, Ops, Align(1));
14648
14649 case X86::BI__builtin_ia32_vpopcntb_128:
14650 case X86::BI__builtin_ia32_vpopcntd_128:
14651 case X86::BI__builtin_ia32_vpopcntq_128:
14652 case X86::BI__builtin_ia32_vpopcntw_128:
14653 case X86::BI__builtin_ia32_vpopcntb_256:
14654 case X86::BI__builtin_ia32_vpopcntd_256:
14655 case X86::BI__builtin_ia32_vpopcntq_256:
14656 case X86::BI__builtin_ia32_vpopcntw_256:
14657 case X86::BI__builtin_ia32_vpopcntb_512:
14658 case X86::BI__builtin_ia32_vpopcntd_512:
14659 case X86::BI__builtin_ia32_vpopcntq_512:
14660 case X86::BI__builtin_ia32_vpopcntw_512: {
14661 llvm::Type *ResultType = ConvertType(E->getType());
14662 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
14663 return Builder.CreateCall(F, Ops);
14664 }
14665 case X86::BI__builtin_ia32_cvtmask2b128:
14666 case X86::BI__builtin_ia32_cvtmask2b256:
14667 case X86::BI__builtin_ia32_cvtmask2b512:
14668 case X86::BI__builtin_ia32_cvtmask2w128:
14669 case X86::BI__builtin_ia32_cvtmask2w256:
14670 case X86::BI__builtin_ia32_cvtmask2w512:
14671 case X86::BI__builtin_ia32_cvtmask2d128:
14672 case X86::BI__builtin_ia32_cvtmask2d256:
14673 case X86::BI__builtin_ia32_cvtmask2d512:
14674 case X86::BI__builtin_ia32_cvtmask2q128:
14675 case X86::BI__builtin_ia32_cvtmask2q256:
14676 case X86::BI__builtin_ia32_cvtmask2q512:
14677 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
14678
14679 case X86::BI__builtin_ia32_cvtb2mask128:
14680 case X86::BI__builtin_ia32_cvtb2mask256:
14681 case X86::BI__builtin_ia32_cvtb2mask512:
14682 case X86::BI__builtin_ia32_cvtw2mask128:
14683 case X86::BI__builtin_ia32_cvtw2mask256:
14684 case X86::BI__builtin_ia32_cvtw2mask512:
14685 case X86::BI__builtin_ia32_cvtd2mask128:
14686 case X86::BI__builtin_ia32_cvtd2mask256:
14687 case X86::BI__builtin_ia32_cvtd2mask512:
14688 case X86::BI__builtin_ia32_cvtq2mask128:
14689 case X86::BI__builtin_ia32_cvtq2mask256:
14690 case X86::BI__builtin_ia32_cvtq2mask512:
14691 return EmitX86ConvertToMask(*this, Ops[0]);
14692
14693 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
14694 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
14695 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
14696 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
14697 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
14698 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
14699 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
14700 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
14701 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
14702 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
14703 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
14704 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
14705 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
14706 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
14707
14708 case X86::BI__builtin_ia32_vfmaddss3:
14709 case X86::BI__builtin_ia32_vfmaddsd3:
14710 case X86::BI__builtin_ia32_vfmaddsh3_mask:
14711 case X86::BI__builtin_ia32_vfmaddss3_mask:
14712 case X86::BI__builtin_ia32_vfmaddsd3_mask:
14713 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
14714 case X86::BI__builtin_ia32_vfmaddss:
14715 case X86::BI__builtin_ia32_vfmaddsd:
14716 return EmitScalarFMAExpr(*this, E, Ops,
14717 Constant::getNullValue(Ops[0]->getType()));
14718 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
14719 case X86::BI__builtin_ia32_vfmaddss3_maskz:
14720 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
14721 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
14722 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
14723 case X86::BI__builtin_ia32_vfmaddss3_mask3:
14724 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
14725 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
14726 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
14727 case X86::BI__builtin_ia32_vfmsubss3_mask3:
14728 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
14729 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
14730 /*NegAcc*/ true);
14731 case X86::BI__builtin_ia32_vfmaddph:
14732 case X86::BI__builtin_ia32_vfmaddps:
14733 case X86::BI__builtin_ia32_vfmaddpd:
14734 case X86::BI__builtin_ia32_vfmaddph256:
14735 case X86::BI__builtin_ia32_vfmaddps256:
14736 case X86::BI__builtin_ia32_vfmaddpd256:
14737 case X86::BI__builtin_ia32_vfmaddph512_mask:
14738 case X86::BI__builtin_ia32_vfmaddph512_maskz:
14739 case X86::BI__builtin_ia32_vfmaddph512_mask3:
14740 case X86::BI__builtin_ia32_vfmaddps512_mask:
14741 case X86::BI__builtin_ia32_vfmaddps512_maskz:
14742 case X86::BI__builtin_ia32_vfmaddps512_mask3:
14743 case X86::BI__builtin_ia32_vfmsubps512_mask3:
14744 case X86::BI__builtin_ia32_vfmaddpd512_mask:
14745 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
14746 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
14747 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
14748 case X86::BI__builtin_ia32_vfmsubph512_mask3:
14749 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
14750 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
14751 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14752 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14753 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14754 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
14755 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14756 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14757 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14758 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14759 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14760 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14761 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14762 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
14763
14764 case X86::BI__builtin_ia32_movdqa32store128_mask:
14765 case X86::BI__builtin_ia32_movdqa64store128_mask:
14766 case X86::BI__builtin_ia32_storeaps128_mask:
14767 case X86::BI__builtin_ia32_storeapd128_mask:
14768 case X86::BI__builtin_ia32_movdqa32store256_mask:
14769 case X86::BI__builtin_ia32_movdqa64store256_mask:
14770 case X86::BI__builtin_ia32_storeaps256_mask:
14771 case X86::BI__builtin_ia32_storeapd256_mask:
14772 case X86::BI__builtin_ia32_movdqa32store512_mask:
14773 case X86::BI__builtin_ia32_movdqa64store512_mask:
14774 case X86::BI__builtin_ia32_storeaps512_mask:
14775 case X86::BI__builtin_ia32_storeapd512_mask:
14776 return EmitX86MaskedStore(
14777 *this, Ops,
14778 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14779
14780 case X86::BI__builtin_ia32_loadups128_mask:
14781 case X86::BI__builtin_ia32_loadups256_mask:
14782 case X86::BI__builtin_ia32_loadups512_mask:
14783 case X86::BI__builtin_ia32_loadupd128_mask:
14784 case X86::BI__builtin_ia32_loadupd256_mask:
14785 case X86::BI__builtin_ia32_loadupd512_mask:
14786 case X86::BI__builtin_ia32_loaddquqi128_mask:
14787 case X86::BI__builtin_ia32_loaddquqi256_mask:
14788 case X86::BI__builtin_ia32_loaddquqi512_mask:
14789 case X86::BI__builtin_ia32_loaddquhi128_mask:
14790 case X86::BI__builtin_ia32_loaddquhi256_mask:
14791 case X86::BI__builtin_ia32_loaddquhi512_mask:
14792 case X86::BI__builtin_ia32_loaddqusi128_mask:
14793 case X86::BI__builtin_ia32_loaddqusi256_mask:
14794 case X86::BI__builtin_ia32_loaddqusi512_mask:
14795 case X86::BI__builtin_ia32_loaddqudi128_mask:
14796 case X86::BI__builtin_ia32_loaddqudi256_mask:
14797 case X86::BI__builtin_ia32_loaddqudi512_mask:
14798 return EmitX86MaskedLoad(*this, Ops, Align(1));
14799
14800 case X86::BI__builtin_ia32_loadsh128_mask:
14801 case X86::BI__builtin_ia32_loadss128_mask:
14802 case X86::BI__builtin_ia32_loadsd128_mask:
14803 return EmitX86MaskedLoad(*this, Ops, Align(1));
14804
14805 case X86::BI__builtin_ia32_loadaps128_mask:
14806 case X86::BI__builtin_ia32_loadaps256_mask:
14807 case X86::BI__builtin_ia32_loadaps512_mask:
14808 case X86::BI__builtin_ia32_loadapd128_mask:
14809 case X86::BI__builtin_ia32_loadapd256_mask:
14810 case X86::BI__builtin_ia32_loadapd512_mask:
14811 case X86::BI__builtin_ia32_movdqa32load128_mask:
14812 case X86::BI__builtin_ia32_movdqa32load256_mask:
14813 case X86::BI__builtin_ia32_movdqa32load512_mask:
14814 case X86::BI__builtin_ia32_movdqa64load128_mask:
14815 case X86::BI__builtin_ia32_movdqa64load256_mask:
14816 case X86::BI__builtin_ia32_movdqa64load512_mask:
14817 return EmitX86MaskedLoad(
14818 *this, Ops,
14819 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14820
14821 case X86::BI__builtin_ia32_expandloaddf128_mask:
14822 case X86::BI__builtin_ia32_expandloaddf256_mask:
14823 case X86::BI__builtin_ia32_expandloaddf512_mask:
14824 case X86::BI__builtin_ia32_expandloadsf128_mask:
14825 case X86::BI__builtin_ia32_expandloadsf256_mask:
14826 case X86::BI__builtin_ia32_expandloadsf512_mask:
14827 case X86::BI__builtin_ia32_expandloaddi128_mask:
14828 case X86::BI__builtin_ia32_expandloaddi256_mask:
14829 case X86::BI__builtin_ia32_expandloaddi512_mask:
14830 case X86::BI__builtin_ia32_expandloadsi128_mask:
14831 case X86::BI__builtin_ia32_expandloadsi256_mask:
14832 case X86::BI__builtin_ia32_expandloadsi512_mask:
14833 case X86::BI__builtin_ia32_expandloadhi128_mask:
14834 case X86::BI__builtin_ia32_expandloadhi256_mask:
14835 case X86::BI__builtin_ia32_expandloadhi512_mask:
14836 case X86::BI__builtin_ia32_expandloadqi128_mask:
14837 case X86::BI__builtin_ia32_expandloadqi256_mask:
14838 case X86::BI__builtin_ia32_expandloadqi512_mask:
14839 return EmitX86ExpandLoad(*this, Ops);
14840
14841 case X86::BI__builtin_ia32_compressstoredf128_mask:
14842 case X86::BI__builtin_ia32_compressstoredf256_mask:
14843 case X86::BI__builtin_ia32_compressstoredf512_mask:
14844 case X86::BI__builtin_ia32_compressstoresf128_mask:
14845 case X86::BI__builtin_ia32_compressstoresf256_mask:
14846 case X86::BI__builtin_ia32_compressstoresf512_mask:
14847 case X86::BI__builtin_ia32_compressstoredi128_mask:
14848 case X86::BI__builtin_ia32_compressstoredi256_mask:
14849 case X86::BI__builtin_ia32_compressstoredi512_mask:
14850 case X86::BI__builtin_ia32_compressstoresi128_mask:
14851 case X86::BI__builtin_ia32_compressstoresi256_mask:
14852 case X86::BI__builtin_ia32_compressstoresi512_mask:
14853 case X86::BI__builtin_ia32_compressstorehi128_mask:
14854 case X86::BI__builtin_ia32_compressstorehi256_mask:
14855 case X86::BI__builtin_ia32_compressstorehi512_mask:
14856 case X86::BI__builtin_ia32_compressstoreqi128_mask:
14857 case X86::BI__builtin_ia32_compressstoreqi256_mask:
14858 case X86::BI__builtin_ia32_compressstoreqi512_mask:
14859 return EmitX86CompressStore(*this, Ops);
14860
14861 case X86::BI__builtin_ia32_expanddf128_mask:
14862 case X86::BI__builtin_ia32_expanddf256_mask:
14863 case X86::BI__builtin_ia32_expanddf512_mask:
14864 case X86::BI__builtin_ia32_expandsf128_mask:
14865 case X86::BI__builtin_ia32_expandsf256_mask:
14866 case X86::BI__builtin_ia32_expandsf512_mask:
14867 case X86::BI__builtin_ia32_expanddi128_mask:
14868 case X86::BI__builtin_ia32_expanddi256_mask:
14869 case X86::BI__builtin_ia32_expanddi512_mask:
14870 case X86::BI__builtin_ia32_expandsi128_mask:
14871 case X86::BI__builtin_ia32_expandsi256_mask:
14872 case X86::BI__builtin_ia32_expandsi512_mask:
14873 case X86::BI__builtin_ia32_expandhi128_mask:
14874 case X86::BI__builtin_ia32_expandhi256_mask:
14875 case X86::BI__builtin_ia32_expandhi512_mask:
14876 case X86::BI__builtin_ia32_expandqi128_mask:
14877 case X86::BI__builtin_ia32_expandqi256_mask:
14878 case X86::BI__builtin_ia32_expandqi512_mask:
14879 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
14880
14881 case X86::BI__builtin_ia32_compressdf128_mask:
14882 case X86::BI__builtin_ia32_compressdf256_mask:
14883 case X86::BI__builtin_ia32_compressdf512_mask:
14884 case X86::BI__builtin_ia32_compresssf128_mask:
14885 case X86::BI__builtin_ia32_compresssf256_mask:
14886 case X86::BI__builtin_ia32_compresssf512_mask:
14887 case X86::BI__builtin_ia32_compressdi128_mask:
14888 case X86::BI__builtin_ia32_compressdi256_mask:
14889 case X86::BI__builtin_ia32_compressdi512_mask:
14890 case X86::BI__builtin_ia32_compresssi128_mask:
14891 case X86::BI__builtin_ia32_compresssi256_mask:
14892 case X86::BI__builtin_ia32_compresssi512_mask:
14893 case X86::BI__builtin_ia32_compresshi128_mask:
14894 case X86::BI__builtin_ia32_compresshi256_mask:
14895 case X86::BI__builtin_ia32_compresshi512_mask:
14896 case X86::BI__builtin_ia32_compressqi128_mask:
14897 case X86::BI__builtin_ia32_compressqi256_mask:
14898 case X86::BI__builtin_ia32_compressqi512_mask:
14899 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
14900
14901 case X86::BI__builtin_ia32_gather3div2df:
14902 case X86::BI__builtin_ia32_gather3div2di:
14903 case X86::BI__builtin_ia32_gather3div4df:
14904 case X86::BI__builtin_ia32_gather3div4di:
14905 case X86::BI__builtin_ia32_gather3div4sf:
14906 case X86::BI__builtin_ia32_gather3div4si:
14907 case X86::BI__builtin_ia32_gather3div8sf:
14908 case X86::BI__builtin_ia32_gather3div8si:
14909 case X86::BI__builtin_ia32_gather3siv2df:
14910 case X86::BI__builtin_ia32_gather3siv2di:
14911 case X86::BI__builtin_ia32_gather3siv4df:
14912 case X86::BI__builtin_ia32_gather3siv4di:
14913 case X86::BI__builtin_ia32_gather3siv4sf:
14914 case X86::BI__builtin_ia32_gather3siv4si:
14915 case X86::BI__builtin_ia32_gather3siv8sf:
14916 case X86::BI__builtin_ia32_gather3siv8si:
14917 case X86::BI__builtin_ia32_gathersiv8df:
14918 case X86::BI__builtin_ia32_gathersiv16sf:
14919 case X86::BI__builtin_ia32_gatherdiv8df:
14920 case X86::BI__builtin_ia32_gatherdiv16sf:
14921 case X86::BI__builtin_ia32_gathersiv8di:
14922 case X86::BI__builtin_ia32_gathersiv16si:
14923 case X86::BI__builtin_ia32_gatherdiv8di:
14924 case X86::BI__builtin_ia32_gatherdiv16si: {
14925 Intrinsic::ID IID;
14926 switch (BuiltinID) {
14927 default: llvm_unreachable("Unexpected builtin");
14928 case X86::BI__builtin_ia32_gather3div2df:
14929 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
14930 break;
14931 case X86::BI__builtin_ia32_gather3div2di:
14932 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
14933 break;
14934 case X86::BI__builtin_ia32_gather3div4df:
14935 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
14936 break;
14937 case X86::BI__builtin_ia32_gather3div4di:
14938 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
14939 break;
14940 case X86::BI__builtin_ia32_gather3div4sf:
14941 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
14942 break;
14943 case X86::BI__builtin_ia32_gather3div4si:
14944 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
14945 break;
14946 case X86::BI__builtin_ia32_gather3div8sf:
14947 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
14948 break;
14949 case X86::BI__builtin_ia32_gather3div8si:
14950 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
14951 break;
14952 case X86::BI__builtin_ia32_gather3siv2df:
14953 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
14954 break;
14955 case X86::BI__builtin_ia32_gather3siv2di:
14956 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
14957 break;
14958 case X86::BI__builtin_ia32_gather3siv4df:
14959 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
14960 break;
14961 case X86::BI__builtin_ia32_gather3siv4di:
14962 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
14963 break;
14964 case X86::BI__builtin_ia32_gather3siv4sf:
14965 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
14966 break;
14967 case X86::BI__builtin_ia32_gather3siv4si:
14968 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
14969 break;
14970 case X86::BI__builtin_ia32_gather3siv8sf:
14971 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
14972 break;
14973 case X86::BI__builtin_ia32_gather3siv8si:
14974 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
14975 break;
14976 case X86::BI__builtin_ia32_gathersiv8df:
14977 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
14978 break;
14979 case X86::BI__builtin_ia32_gathersiv16sf:
14980 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
14981 break;
14982 case X86::BI__builtin_ia32_gatherdiv8df:
14983 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
14984 break;
14985 case X86::BI__builtin_ia32_gatherdiv16sf:
14986 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
14987 break;
14988 case X86::BI__builtin_ia32_gathersiv8di:
14989 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
14990 break;
14991 case X86::BI__builtin_ia32_gathersiv16si:
14992 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
14993 break;
14994 case X86::BI__builtin_ia32_gatherdiv8di:
14995 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
14996 break;
14997 case X86::BI__builtin_ia32_gatherdiv16si:
14998 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
14999 break;
15000 }
15001
15002 unsigned MinElts = std::min(
15003 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15004 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15005 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
15006 Function *Intr = CGM.getIntrinsic(IID);
15007 return Builder.CreateCall(Intr, Ops);
15008 }
15009
15010 case X86::BI__builtin_ia32_scattersiv8df:
15011 case X86::BI__builtin_ia32_scattersiv16sf:
15012 case X86::BI__builtin_ia32_scatterdiv8df:
15013 case X86::BI__builtin_ia32_scatterdiv16sf:
15014 case X86::BI__builtin_ia32_scattersiv8di:
15015 case X86::BI__builtin_ia32_scattersiv16si:
15016 case X86::BI__builtin_ia32_scatterdiv8di:
15017 case X86::BI__builtin_ia32_scatterdiv16si:
15018 case X86::BI__builtin_ia32_scatterdiv2df:
15019 case X86::BI__builtin_ia32_scatterdiv2di:
15020 case X86::BI__builtin_ia32_scatterdiv4df:
15021 case X86::BI__builtin_ia32_scatterdiv4di:
15022 case X86::BI__builtin_ia32_scatterdiv4sf:
15023 case X86::BI__builtin_ia32_scatterdiv4si:
15024 case X86::BI__builtin_ia32_scatterdiv8sf:
15025 case X86::BI__builtin_ia32_scatterdiv8si:
15026 case X86::BI__builtin_ia32_scattersiv2df:
15027 case X86::BI__builtin_ia32_scattersiv2di:
15028 case X86::BI__builtin_ia32_scattersiv4df:
15029 case X86::BI__builtin_ia32_scattersiv4di:
15030 case X86::BI__builtin_ia32_scattersiv4sf:
15031 case X86::BI__builtin_ia32_scattersiv4si:
15032 case X86::BI__builtin_ia32_scattersiv8sf:
15033 case X86::BI__builtin_ia32_scattersiv8si: {
15034 Intrinsic::ID IID;
15035 switch (BuiltinID) {
15036 default: llvm_unreachable("Unexpected builtin");
15037 case X86::BI__builtin_ia32_scattersiv8df:
15038 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15039 break;
15040 case X86::BI__builtin_ia32_scattersiv16sf:
15041 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15042 break;
15043 case X86::BI__builtin_ia32_scatterdiv8df:
15044 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15045 break;
15046 case X86::BI__builtin_ia32_scatterdiv16sf:
15047 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15048 break;
15049 case X86::BI__builtin_ia32_scattersiv8di:
15050 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15051 break;
15052 case X86::BI__builtin_ia32_scattersiv16si:
15053 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15054 break;
15055 case X86::BI__builtin_ia32_scatterdiv8di:
15056 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15057 break;
15058 case X86::BI__builtin_ia32_scatterdiv16si:
15059 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15060 break;
15061 case X86::BI__builtin_ia32_scatterdiv2df:
15062 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15063 break;
15064 case X86::BI__builtin_ia32_scatterdiv2di:
15065 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15066 break;
15067 case X86::BI__builtin_ia32_scatterdiv4df:
15068 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15069 break;
15070 case X86::BI__builtin_ia32_scatterdiv4di:
15071 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15072 break;
15073 case X86::BI__builtin_ia32_scatterdiv4sf:
15074 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15075 break;
15076 case X86::BI__builtin_ia32_scatterdiv4si:
15077 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15078 break;
15079 case X86::BI__builtin_ia32_scatterdiv8sf:
15080 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15081 break;
15082 case X86::BI__builtin_ia32_scatterdiv8si:
15083 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15084 break;
15085 case X86::BI__builtin_ia32_scattersiv2df:
15086 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15087 break;
15088 case X86::BI__builtin_ia32_scattersiv2di:
15089 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15090 break;
15091 case X86::BI__builtin_ia32_scattersiv4df:
15092 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15093 break;
15094 case X86::BI__builtin_ia32_scattersiv4di:
15095 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15096 break;
15097 case X86::BI__builtin_ia32_scattersiv4sf:
15098 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15099 break;
15100 case X86::BI__builtin_ia32_scattersiv4si:
15101 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15102 break;
15103 case X86::BI__builtin_ia32_scattersiv8sf:
15104 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15105 break;
15106 case X86::BI__builtin_ia32_scattersiv8si:
15107 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15108 break;
15109 }
15110
15111 unsigned MinElts = std::min(
15112 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15113 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15114 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15115 Function *Intr = CGM.getIntrinsic(IID);
15116 return Builder.CreateCall(Intr, Ops);
15117 }
15118
15119 case X86::BI__builtin_ia32_vextractf128_pd256:
15120 case X86::BI__builtin_ia32_vextractf128_ps256:
15121 case X86::BI__builtin_ia32_vextractf128_si256:
15122 case X86::BI__builtin_ia32_extract128i256:
15123 case X86::BI__builtin_ia32_extractf64x4_mask:
15124 case X86::BI__builtin_ia32_extractf32x4_mask:
15125 case X86::BI__builtin_ia32_extracti64x4_mask:
15126 case X86::BI__builtin_ia32_extracti32x4_mask:
15127 case X86::BI__builtin_ia32_extractf32x8_mask:
15128 case X86::BI__builtin_ia32_extracti32x8_mask:
15129 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15130 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15131 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15132 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15133 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15134 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15135 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15136 unsigned NumElts = DstTy->getNumElements();
15137 unsigned SrcNumElts =
15138 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15139 unsigned SubVectors = SrcNumElts / NumElts;
15140 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15141 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15142 Index &= SubVectors - 1; // Remove any extra bits.
15143 Index *= NumElts;
15144
15145 int Indices[16];
15146 for (unsigned i = 0; i != NumElts; ++i)
15147 Indices[i] = i + Index;
15148
15149 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15150 "extract");
15151
15152 if (Ops.size() == 4)
15153 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15154
15155 return Res;
15156 }
15157 case X86::BI__builtin_ia32_vinsertf128_pd256:
15158 case X86::BI__builtin_ia32_vinsertf128_ps256:
15159 case X86::BI__builtin_ia32_vinsertf128_si256:
15160 case X86::BI__builtin_ia32_insert128i256:
15161 case X86::BI__builtin_ia32_insertf64x4:
15162 case X86::BI__builtin_ia32_insertf32x4:
15163 case X86::BI__builtin_ia32_inserti64x4:
15164 case X86::BI__builtin_ia32_inserti32x4:
15165 case X86::BI__builtin_ia32_insertf32x8:
15166 case X86::BI__builtin_ia32_inserti32x8:
15167 case X86::BI__builtin_ia32_insertf32x4_256:
15168 case X86::BI__builtin_ia32_inserti32x4_256:
15169 case X86::BI__builtin_ia32_insertf64x2_256:
15170 case X86::BI__builtin_ia32_inserti64x2_256:
15171 case X86::BI__builtin_ia32_insertf64x2_512:
15172 case X86::BI__builtin_ia32_inserti64x2_512: {
15173 unsigned DstNumElts =
15174 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15175 unsigned SrcNumElts =
15176 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15177 unsigned SubVectors = DstNumElts / SrcNumElts;
15178 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15179 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15180 Index &= SubVectors - 1; // Remove any extra bits.
15181 Index *= SrcNumElts;
15182
15183 int Indices[16];
15184 for (unsigned i = 0; i != DstNumElts; ++i)
15185 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15186
15187 Value *Op1 = Builder.CreateShuffleVector(
15188 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15189
15190 for (unsigned i = 0; i != DstNumElts; ++i) {
15191 if (i >= Index && i < (Index + SrcNumElts))
15192 Indices[i] = (i - Index) + DstNumElts;
15193 else
15194 Indices[i] = i;
15195 }
15196
15197 return Builder.CreateShuffleVector(Ops[0], Op1,
15198 ArrayRef(Indices, DstNumElts), "insert");
15199 }
15200 case X86::BI__builtin_ia32_pmovqd512_mask:
15201 case X86::BI__builtin_ia32_pmovwb512_mask: {
15202 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15203 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15204 }
15205 case X86::BI__builtin_ia32_pmovdb512_mask:
15206 case X86::BI__builtin_ia32_pmovdw512_mask:
15207 case X86::BI__builtin_ia32_pmovqw512_mask: {
15208 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15209 if (C->isAllOnesValue())
15210 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15211
15212 Intrinsic::ID IID;
15213 switch (BuiltinID) {
15214 default: llvm_unreachable("Unsupported intrinsic!");
15215 case X86::BI__builtin_ia32_pmovdb512_mask:
15216 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15217 break;
15218 case X86::BI__builtin_ia32_pmovdw512_mask:
15219 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15220 break;
15221 case X86::BI__builtin_ia32_pmovqw512_mask:
15222 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15223 break;
15224 }
15225
15226 Function *Intr = CGM.getIntrinsic(IID);
15227 return Builder.CreateCall(Intr, Ops);
15228 }
15229 case X86::BI__builtin_ia32_pblendw128:
15230 case X86::BI__builtin_ia32_blendpd:
15231 case X86::BI__builtin_ia32_blendps:
15232 case X86::BI__builtin_ia32_blendpd256:
15233 case X86::BI__builtin_ia32_blendps256:
15234 case X86::BI__builtin_ia32_pblendw256:
15235 case X86::BI__builtin_ia32_pblendd128:
15236 case X86::BI__builtin_ia32_pblendd256: {
15237 unsigned NumElts =
15238 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15239 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15240
15241 int Indices[16];
15242 // If there are more than 8 elements, the immediate is used twice so make
15243 // sure we handle that.
15244 for (unsigned i = 0; i != NumElts; ++i)
15245 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15246
15247 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15248 ArrayRef(Indices, NumElts), "blend");
15249 }
15250 case X86::BI__builtin_ia32_pshuflw:
15251 case X86::BI__builtin_ia32_pshuflw256:
15252 case X86::BI__builtin_ia32_pshuflw512: {
15253 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15254 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15255 unsigned NumElts = Ty->getNumElements();
15256
15257 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15258 Imm = (Imm & 0xff) * 0x01010101;
15259
15260 int Indices[32];
15261 for (unsigned l = 0; l != NumElts; l += 8) {
15262 for (unsigned i = 0; i != 4; ++i) {
15263 Indices[l + i] = l + (Imm & 3);
15264 Imm >>= 2;
15265 }
15266 for (unsigned i = 4; i != 8; ++i)
15267 Indices[l + i] = l + i;
15268 }
15269
15270 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15271 "pshuflw");
15272 }
15273 case X86::BI__builtin_ia32_pshufhw:
15274 case X86::BI__builtin_ia32_pshufhw256:
15275 case X86::BI__builtin_ia32_pshufhw512: {
15276 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15277 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15278 unsigned NumElts = Ty->getNumElements();
15279
15280 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15281 Imm = (Imm & 0xff) * 0x01010101;
15282
15283 int Indices[32];
15284 for (unsigned l = 0; l != NumElts; l += 8) {
15285 for (unsigned i = 0; i != 4; ++i)
15286 Indices[l + i] = l + i;
15287 for (unsigned i = 4; i != 8; ++i) {
15288 Indices[l + i] = l + 4 + (Imm & 3);
15289 Imm >>= 2;
15290 }
15291 }
15292
15293 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15294 "pshufhw");
15295 }
15296 case X86::BI__builtin_ia32_pshufd:
15297 case X86::BI__builtin_ia32_pshufd256:
15298 case X86::BI__builtin_ia32_pshufd512:
15299 case X86::BI__builtin_ia32_vpermilpd:
15300 case X86::BI__builtin_ia32_vpermilps:
15301 case X86::BI__builtin_ia32_vpermilpd256:
15302 case X86::BI__builtin_ia32_vpermilps256:
15303 case X86::BI__builtin_ia32_vpermilpd512:
15304 case X86::BI__builtin_ia32_vpermilps512: {
15305 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15306 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15307 unsigned NumElts = Ty->getNumElements();
15308 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15309 unsigned NumLaneElts = NumElts / NumLanes;
15310
15311 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15312 Imm = (Imm & 0xff) * 0x01010101;
15313
15314 int Indices[16];
15315 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15316 for (unsigned i = 0; i != NumLaneElts; ++i) {
15317 Indices[i + l] = (Imm % NumLaneElts) + l;
15318 Imm /= NumLaneElts;
15319 }
15320 }
15321
15322 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15323 "permil");
15324 }
15325 case X86::BI__builtin_ia32_shufpd:
15326 case X86::BI__builtin_ia32_shufpd256:
15327 case X86::BI__builtin_ia32_shufpd512:
15328 case X86::BI__builtin_ia32_shufps:
15329 case X86::BI__builtin_ia32_shufps256:
15330 case X86::BI__builtin_ia32_shufps512: {
15331 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15332 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15333 unsigned NumElts = Ty->getNumElements();
15334 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15335 unsigned NumLaneElts = NumElts / NumLanes;
15336
15337 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15338 Imm = (Imm & 0xff) * 0x01010101;
15339
15340 int Indices[16];
15341 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15342 for (unsigned i = 0; i != NumLaneElts; ++i) {
15343 unsigned Index = Imm % NumLaneElts;
15344 Imm /= NumLaneElts;
15345 if (i >= (NumLaneElts / 2))
15346 Index += NumElts;
15347 Indices[l + i] = l + Index;
15348 }
15349 }
15350
15351 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15352 ArrayRef(Indices, NumElts), "shufp");
15353 }
15354 case X86::BI__builtin_ia32_permdi256:
15355 case X86::BI__builtin_ia32_permdf256:
15356 case X86::BI__builtin_ia32_permdi512:
15357 case X86::BI__builtin_ia32_permdf512: {
15358 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15359 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15360 unsigned NumElts = Ty->getNumElements();
15361
15362 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15363 int Indices[8];
15364 for (unsigned l = 0; l != NumElts; l += 4)
15365 for (unsigned i = 0; i != 4; ++i)
15366 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15367
15368 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15369 "perm");
15370 }
15371 case X86::BI__builtin_ia32_palignr128:
15372 case X86::BI__builtin_ia32_palignr256:
15373 case X86::BI__builtin_ia32_palignr512: {
15374 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15375
15376 unsigned NumElts =
15377 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15378 assert(NumElts % 16 == 0);
15379
15380 // If palignr is shifting the pair of vectors more than the size of two
15381 // lanes, emit zero.
15382 if (ShiftVal >= 32)
15383 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15384
15385 // If palignr is shifting the pair of input vectors more than one lane,
15386 // but less than two lanes, convert to shifting in zeroes.
15387 if (ShiftVal > 16) {
15388 ShiftVal -= 16;
15389 Ops[1] = Ops[0];
15390 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
15391 }
15392
15393 int Indices[64];
15394 // 256-bit palignr operates on 128-bit lanes so we need to handle that
15395 for (unsigned l = 0; l != NumElts; l += 16) {
15396 for (unsigned i = 0; i != 16; ++i) {
15397 unsigned Idx = ShiftVal + i;
15398 if (Idx >= 16)
15399 Idx += NumElts - 16; // End of lane, switch operand.
15400 Indices[l + i] = Idx + l;
15401 }
15402 }
15403
15404 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15405 ArrayRef(Indices, NumElts), "palignr");
15406 }
15407 case X86::BI__builtin_ia32_alignd128:
15408 case X86::BI__builtin_ia32_alignd256:
15409 case X86::BI__builtin_ia32_alignd512:
15410 case X86::BI__builtin_ia32_alignq128:
15411 case X86::BI__builtin_ia32_alignq256:
15412 case X86::BI__builtin_ia32_alignq512: {
15413 unsigned NumElts =
15414 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15415 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15416
15417 // Mask the shift amount to width of a vector.
15418 ShiftVal &= NumElts - 1;
15419
15420 int Indices[16];
15421 for (unsigned i = 0; i != NumElts; ++i)
15422 Indices[i] = i + ShiftVal;
15423
15424 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15425 ArrayRef(Indices, NumElts), "valign");
15426 }
15427 case X86::BI__builtin_ia32_shuf_f32x4_256:
15428 case X86::BI__builtin_ia32_shuf_f64x2_256:
15429 case X86::BI__builtin_ia32_shuf_i32x4_256:
15430 case X86::BI__builtin_ia32_shuf_i64x2_256:
15431 case X86::BI__builtin_ia32_shuf_f32x4:
15432 case X86::BI__builtin_ia32_shuf_f64x2:
15433 case X86::BI__builtin_ia32_shuf_i32x4:
15434 case X86::BI__builtin_ia32_shuf_i64x2: {
15435 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15436 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15437 unsigned NumElts = Ty->getNumElements();
15438 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
15439 unsigned NumLaneElts = NumElts / NumLanes;
15440
15441 int Indices[16];
15442 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15443 unsigned Index = (Imm % NumLanes) * NumLaneElts;
15444 Imm /= NumLanes; // Discard the bits we just used.
15445 if (l >= (NumElts / 2))
15446 Index += NumElts; // Switch to other source.
15447 for (unsigned i = 0; i != NumLaneElts; ++i) {
15448 Indices[l + i] = Index + i;
15449 }
15450 }
15451
15452 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15453 ArrayRef(Indices, NumElts), "shuf");
15454 }
15455
15456 case X86::BI__builtin_ia32_vperm2f128_pd256:
15457 case X86::BI__builtin_ia32_vperm2f128_ps256:
15458 case X86::BI__builtin_ia32_vperm2f128_si256:
15459 case X86::BI__builtin_ia32_permti256: {
15460 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15461 unsigned NumElts =
15462 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15463
15464 // This takes a very simple approach since there are two lanes and a
15465 // shuffle can have 2 inputs. So we reserve the first input for the first
15466 // lane and the second input for the second lane. This may result in
15467 // duplicate sources, but this can be dealt with in the backend.
15468
15469 Value *OutOps[2];
15470 int Indices[8];
15471 for (unsigned l = 0; l != 2; ++l) {
15472 // Determine the source for this lane.
15473 if (Imm & (1 << ((l * 4) + 3)))
15474 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
15475 else if (Imm & (1 << ((l * 4) + 1)))
15476 OutOps[l] = Ops[1];
15477 else
15478 OutOps[l] = Ops[0];
15479
15480 for (unsigned i = 0; i != NumElts/2; ++i) {
15481 // Start with ith element of the source for this lane.
15482 unsigned Idx = (l * NumElts) + i;
15483 // If bit 0 of the immediate half is set, switch to the high half of
15484 // the source.
15485 if (Imm & (1 << (l * 4)))
15486 Idx += NumElts/2;
15487 Indices[(l * (NumElts/2)) + i] = Idx;
15488 }
15489 }
15490
15491 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
15492 ArrayRef(Indices, NumElts), "vperm");
15493 }
15494
15495 case X86::BI__builtin_ia32_pslldqi128_byteshift:
15496 case X86::BI__builtin_ia32_pslldqi256_byteshift:
15497 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
15498 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15499 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15500 // Builtin type is vXi64 so multiply by 8 to get bytes.
15501 unsigned NumElts = ResultType->getNumElements() * 8;
15502
15503 // If pslldq is shifting the vector more than 15 bytes, emit zero.
15504 if (ShiftVal >= 16)
15505 return llvm::Constant::getNullValue(ResultType);
15506
15507 int Indices[64];
15508 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
15509 for (unsigned l = 0; l != NumElts; l += 16) {
15510 for (unsigned i = 0; i != 16; ++i) {
15511 unsigned Idx = NumElts + i - ShiftVal;
15512 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
15513 Indices[l + i] = Idx + l;
15514 }
15515 }
15516
15517 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15518 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15519 Value *Zero = llvm::Constant::getNullValue(VecTy);
15520 Value *SV = Builder.CreateShuffleVector(
15521 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
15522 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
15523 }
15524 case X86::BI__builtin_ia32_psrldqi128_byteshift:
15525 case X86::BI__builtin_ia32_psrldqi256_byteshift:
15526 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
15527 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15528 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15529 // Builtin type is vXi64 so multiply by 8 to get bytes.
15530 unsigned NumElts = ResultType->getNumElements() * 8;
15531
15532 // If psrldq is shifting the vector more than 15 bytes, emit zero.
15533 if (ShiftVal >= 16)
15534 return llvm::Constant::getNullValue(ResultType);
15535
15536 int Indices[64];
15537 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
15538 for (unsigned l = 0; l != NumElts; l += 16) {
15539 for (unsigned i = 0; i != 16; ++i) {
15540 unsigned Idx = i + ShiftVal;
15541 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
15542 Indices[l + i] = Idx + l;
15543 }
15544 }
15545
15546 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15547 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15548 Value *Zero = llvm::Constant::getNullValue(VecTy);
15549 Value *SV = Builder.CreateShuffleVector(
15550 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
15551 return Builder.CreateBitCast(SV, ResultType, "cast");
15552 }
15553 case X86::BI__builtin_ia32_kshiftliqi:
15554 case X86::BI__builtin_ia32_kshiftlihi:
15555 case X86::BI__builtin_ia32_kshiftlisi:
15556 case X86::BI__builtin_ia32_kshiftlidi: {
15557 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15558 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15559
15560 if (ShiftVal >= NumElts)
15561 return llvm::Constant::getNullValue(Ops[0]->getType());
15562
15563 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15564
15565 int Indices[64];
15566 for (unsigned i = 0; i != NumElts; ++i)
15567 Indices[i] = NumElts + i - ShiftVal;
15568
15569 Value *Zero = llvm::Constant::getNullValue(In->getType());
15570 Value *SV = Builder.CreateShuffleVector(
15571 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
15572 return Builder.CreateBitCast(SV, Ops[0]->getType());
15573 }
15574 case X86::BI__builtin_ia32_kshiftriqi:
15575 case X86::BI__builtin_ia32_kshiftrihi:
15576 case X86::BI__builtin_ia32_kshiftrisi:
15577 case X86::BI__builtin_ia32_kshiftridi: {
15578 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15579 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15580
15581 if (ShiftVal >= NumElts)
15582 return llvm::Constant::getNullValue(Ops[0]->getType());
15583
15584 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15585
15586 int Indices[64];
15587 for (unsigned i = 0; i != NumElts; ++i)
15588 Indices[i] = i + ShiftVal;
15589
15590 Value *Zero = llvm::Constant::getNullValue(In->getType());
15591 Value *SV = Builder.CreateShuffleVector(
15592 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
15593 return Builder.CreateBitCast(SV, Ops[0]->getType());
15594 }
15595 case X86::BI__builtin_ia32_movnti:
15596 case X86::BI__builtin_ia32_movnti64:
15597 case X86::BI__builtin_ia32_movntsd:
15598 case X86::BI__builtin_ia32_movntss: {
15599 llvm::MDNode *Node = llvm::MDNode::get(
15600 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
15601
15602 Value *Ptr = Ops[0];
15603 Value *Src = Ops[1];
15604
15605 // Extract the 0'th element of the source vector.
15606 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
15607 BuiltinID == X86::BI__builtin_ia32_movntss)
15608 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
15609
15610 // Unaligned nontemporal store of the scalar value.
15611 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
15612 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
15613 SI->setAlignment(llvm::Align(1));
15614 return SI;
15615 }
15616 // Rotate is a special case of funnel shift - 1st 2 args are the same.
15617 case X86::BI__builtin_ia32_vprotb:
15618 case X86::BI__builtin_ia32_vprotw:
15619 case X86::BI__builtin_ia32_vprotd:
15620 case X86::BI__builtin_ia32_vprotq:
15621 case X86::BI__builtin_ia32_vprotbi:
15622 case X86::BI__builtin_ia32_vprotwi:
15623 case X86::BI__builtin_ia32_vprotdi:
15624 case X86::BI__builtin_ia32_vprotqi:
15625 case X86::BI__builtin_ia32_prold128:
15626 case X86::BI__builtin_ia32_prold256:
15627 case X86::BI__builtin_ia32_prold512:
15628 case X86::BI__builtin_ia32_prolq128:
15629 case X86::BI__builtin_ia32_prolq256:
15630 case X86::BI__builtin_ia32_prolq512:
15631 case X86::BI__builtin_ia32_prolvd128:
15632 case X86::BI__builtin_ia32_prolvd256:
15633 case X86::BI__builtin_ia32_prolvd512:
15634 case X86::BI__builtin_ia32_prolvq128:
15635 case X86::BI__builtin_ia32_prolvq256:
15636 case X86::BI__builtin_ia32_prolvq512:
15637 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
15638 case X86::BI__builtin_ia32_prord128:
15639 case X86::BI__builtin_ia32_prord256:
15640 case X86::BI__builtin_ia32_prord512:
15641 case X86::BI__builtin_ia32_prorq128:
15642 case X86::BI__builtin_ia32_prorq256:
15643 case X86::BI__builtin_ia32_prorq512:
15644 case X86::BI__builtin_ia32_prorvd128:
15645 case X86::BI__builtin_ia32_prorvd256:
15646 case X86::BI__builtin_ia32_prorvd512:
15647 case X86::BI__builtin_ia32_prorvq128:
15648 case X86::BI__builtin_ia32_prorvq256:
15649 case X86::BI__builtin_ia32_prorvq512:
15650 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
15651 case X86::BI__builtin_ia32_selectb_128:
15652 case X86::BI__builtin_ia32_selectb_256:
15653 case X86::BI__builtin_ia32_selectb_512:
15654 case X86::BI__builtin_ia32_selectw_128:
15655 case X86::BI__builtin_ia32_selectw_256:
15656 case X86::BI__builtin_ia32_selectw_512:
15657 case X86::BI__builtin_ia32_selectd_128:
15658 case X86::BI__builtin_ia32_selectd_256:
15659 case X86::BI__builtin_ia32_selectd_512:
15660 case X86::BI__builtin_ia32_selectq_128:
15661 case X86::BI__builtin_ia32_selectq_256:
15662 case X86::BI__builtin_ia32_selectq_512:
15663 case X86::BI__builtin_ia32_selectph_128:
15664 case X86::BI__builtin_ia32_selectph_256:
15665 case X86::BI__builtin_ia32_selectph_512:
15666 case X86::BI__builtin_ia32_selectpbf_128:
15667 case X86::BI__builtin_ia32_selectpbf_256:
15668 case X86::BI__builtin_ia32_selectpbf_512:
15669 case X86::BI__builtin_ia32_selectps_128:
15670 case X86::BI__builtin_ia32_selectps_256:
15671 case X86::BI__builtin_ia32_selectps_512:
15672 case X86::BI__builtin_ia32_selectpd_128:
15673 case X86::BI__builtin_ia32_selectpd_256:
15674 case X86::BI__builtin_ia32_selectpd_512:
15675 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
15676 case X86::BI__builtin_ia32_selectsh_128:
15677 case X86::BI__builtin_ia32_selectsbf_128:
15678 case X86::BI__builtin_ia32_selectss_128:
15679 case X86::BI__builtin_ia32_selectsd_128: {
15680 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15681 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15682 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
15683 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
15684 }
15685 case X86::BI__builtin_ia32_cmpb128_mask:
15686 case X86::BI__builtin_ia32_cmpb256_mask:
15687 case X86::BI__builtin_ia32_cmpb512_mask:
15688 case X86::BI__builtin_ia32_cmpw128_mask:
15689 case X86::BI__builtin_ia32_cmpw256_mask:
15690 case X86::BI__builtin_ia32_cmpw512_mask:
15691 case X86::BI__builtin_ia32_cmpd128_mask:
15692 case X86::BI__builtin_ia32_cmpd256_mask:
15693 case X86::BI__builtin_ia32_cmpd512_mask:
15694 case X86::BI__builtin_ia32_cmpq128_mask:
15695 case X86::BI__builtin_ia32_cmpq256_mask:
15696 case X86::BI__builtin_ia32_cmpq512_mask: {
15697 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15698 return EmitX86MaskedCompare(*this, CC, true, Ops);
15699 }
15700 case X86::BI__builtin_ia32_ucmpb128_mask:
15701 case X86::BI__builtin_ia32_ucmpb256_mask:
15702 case X86::BI__builtin_ia32_ucmpb512_mask:
15703 case X86::BI__builtin_ia32_ucmpw128_mask:
15704 case X86::BI__builtin_ia32_ucmpw256_mask:
15705 case X86::BI__builtin_ia32_ucmpw512_mask:
15706 case X86::BI__builtin_ia32_ucmpd128_mask:
15707 case X86::BI__builtin_ia32_ucmpd256_mask:
15708 case X86::BI__builtin_ia32_ucmpd512_mask:
15709 case X86::BI__builtin_ia32_ucmpq128_mask:
15710 case X86::BI__builtin_ia32_ucmpq256_mask:
15711 case X86::BI__builtin_ia32_ucmpq512_mask: {
15712 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15713 return EmitX86MaskedCompare(*this, CC, false, Ops);
15714 }
15715 case X86::BI__builtin_ia32_vpcomb:
15716 case X86::BI__builtin_ia32_vpcomw:
15717 case X86::BI__builtin_ia32_vpcomd:
15718 case X86::BI__builtin_ia32_vpcomq:
15719 return EmitX86vpcom(*this, Ops, true);
15720 case X86::BI__builtin_ia32_vpcomub:
15721 case X86::BI__builtin_ia32_vpcomuw:
15722 case X86::BI__builtin_ia32_vpcomud:
15723 case X86::BI__builtin_ia32_vpcomuq:
15724 return EmitX86vpcom(*this, Ops, false);
15725
15726 case X86::BI__builtin_ia32_kortestcqi:
15727 case X86::BI__builtin_ia32_kortestchi:
15728 case X86::BI__builtin_ia32_kortestcsi:
15729 case X86::BI__builtin_ia32_kortestcdi: {
15730 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15731 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
15732 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15733 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15734 }
15735 case X86::BI__builtin_ia32_kortestzqi:
15736 case X86::BI__builtin_ia32_kortestzhi:
15737 case X86::BI__builtin_ia32_kortestzsi:
15738 case X86::BI__builtin_ia32_kortestzdi: {
15739 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15740 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
15741 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15742 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15743 }
15744
15745 case X86::BI__builtin_ia32_ktestcqi:
15746 case X86::BI__builtin_ia32_ktestzqi:
15747 case X86::BI__builtin_ia32_ktestchi:
15748 case X86::BI__builtin_ia32_ktestzhi:
15749 case X86::BI__builtin_ia32_ktestcsi:
15750 case X86::BI__builtin_ia32_ktestzsi:
15751 case X86::BI__builtin_ia32_ktestcdi:
15752 case X86::BI__builtin_ia32_ktestzdi: {
15753 Intrinsic::ID IID;
15754 switch (BuiltinID) {
15755 default: llvm_unreachable("Unsupported intrinsic!");
15756 case X86::BI__builtin_ia32_ktestcqi:
15757 IID = Intrinsic::x86_avx512_ktestc_b;
15758 break;
15759 case X86::BI__builtin_ia32_ktestzqi:
15760 IID = Intrinsic::x86_avx512_ktestz_b;
15761 break;
15762 case X86::BI__builtin_ia32_ktestchi:
15763 IID = Intrinsic::x86_avx512_ktestc_w;
15764 break;
15765 case X86::BI__builtin_ia32_ktestzhi:
15766 IID = Intrinsic::x86_avx512_ktestz_w;
15767 break;
15768 case X86::BI__builtin_ia32_ktestcsi:
15769 IID = Intrinsic::x86_avx512_ktestc_d;
15770 break;
15771 case X86::BI__builtin_ia32_ktestzsi:
15772 IID = Intrinsic::x86_avx512_ktestz_d;
15773 break;
15774 case X86::BI__builtin_ia32_ktestcdi:
15775 IID = Intrinsic::x86_avx512_ktestc_q;
15776 break;
15777 case X86::BI__builtin_ia32_ktestzdi:
15778 IID = Intrinsic::x86_avx512_ktestz_q;
15779 break;
15780 }
15781
15782 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15783 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15784 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15785 Function *Intr = CGM.getIntrinsic(IID);
15786 return Builder.CreateCall(Intr, {LHS, RHS});
15787 }
15788
15789 case X86::BI__builtin_ia32_kaddqi:
15790 case X86::BI__builtin_ia32_kaddhi:
15791 case X86::BI__builtin_ia32_kaddsi:
15792 case X86::BI__builtin_ia32_kadddi: {
15793 Intrinsic::ID IID;
15794 switch (BuiltinID) {
15795 default: llvm_unreachable("Unsupported intrinsic!");
15796 case X86::BI__builtin_ia32_kaddqi:
15797 IID = Intrinsic::x86_avx512_kadd_b;
15798 break;
15799 case X86::BI__builtin_ia32_kaddhi:
15800 IID = Intrinsic::x86_avx512_kadd_w;
15801 break;
15802 case X86::BI__builtin_ia32_kaddsi:
15803 IID = Intrinsic::x86_avx512_kadd_d;
15804 break;
15805 case X86::BI__builtin_ia32_kadddi:
15806 IID = Intrinsic::x86_avx512_kadd_q;
15807 break;
15808 }
15809
15810 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15811 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15812 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15813 Function *Intr = CGM.getIntrinsic(IID);
15814 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
15815 return Builder.CreateBitCast(Res, Ops[0]->getType());
15816 }
15817 case X86::BI__builtin_ia32_kandqi:
15818 case X86::BI__builtin_ia32_kandhi:
15819 case X86::BI__builtin_ia32_kandsi:
15820 case X86::BI__builtin_ia32_kanddi:
15821 return EmitX86MaskLogic(*this, Instruction::And, Ops);
15822 case X86::BI__builtin_ia32_kandnqi:
15823 case X86::BI__builtin_ia32_kandnhi:
15824 case X86::BI__builtin_ia32_kandnsi:
15825 case X86::BI__builtin_ia32_kandndi:
15826 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
15827 case X86::BI__builtin_ia32_korqi:
15828 case X86::BI__builtin_ia32_korhi:
15829 case X86::BI__builtin_ia32_korsi:
15830 case X86::BI__builtin_ia32_kordi:
15831 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
15832 case X86::BI__builtin_ia32_kxnorqi:
15833 case X86::BI__builtin_ia32_kxnorhi:
15834 case X86::BI__builtin_ia32_kxnorsi:
15835 case X86::BI__builtin_ia32_kxnordi:
15836 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
15837 case X86::BI__builtin_ia32_kxorqi:
15838 case X86::BI__builtin_ia32_kxorhi:
15839 case X86::BI__builtin_ia32_kxorsi:
15840 case X86::BI__builtin_ia32_kxordi:
15841 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
15842 case X86::BI__builtin_ia32_knotqi:
15843 case X86::BI__builtin_ia32_knothi:
15844 case X86::BI__builtin_ia32_knotsi:
15845 case X86::BI__builtin_ia32_knotdi: {
15846 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15847 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15848 return Builder.CreateBitCast(Builder.CreateNot(Res),
15849 Ops[0]->getType());
15850 }
15851 case X86::BI__builtin_ia32_kmovb:
15852 case X86::BI__builtin_ia32_kmovw:
15853 case X86::BI__builtin_ia32_kmovd:
15854 case X86::BI__builtin_ia32_kmovq: {
15855 // Bitcast to vXi1 type and then back to integer. This gets the mask
15856 // register type into the IR, but might be optimized out depending on
15857 // what's around it.
15858 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15859 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15860 return Builder.CreateBitCast(Res, Ops[0]->getType());
15861 }
15862
15863 case X86::BI__builtin_ia32_kunpckdi:
15864 case X86::BI__builtin_ia32_kunpcksi:
15865 case X86::BI__builtin_ia32_kunpckhi: {
15866 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15867 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15868 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15869 int Indices[64];
15870 for (unsigned i = 0; i != NumElts; ++i)
15871 Indices[i] = i;
15872
15873 // First extract half of each vector. This gives better codegen than
15874 // doing it in a single shuffle.
15875 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
15876 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
15877 // Concat the vectors.
15878 // NOTE: Operands are swapped to match the intrinsic definition.
15879 Value *Res =
15880 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
15881 return Builder.CreateBitCast(Res, Ops[0]->getType());
15882 }
15883
15884 case X86::BI__builtin_ia32_vplzcntd_128:
15885 case X86::BI__builtin_ia32_vplzcntd_256:
15886 case X86::BI__builtin_ia32_vplzcntd_512:
15887 case X86::BI__builtin_ia32_vplzcntq_128:
15888 case X86::BI__builtin_ia32_vplzcntq_256:
15889 case X86::BI__builtin_ia32_vplzcntq_512: {
15890 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15891 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
15892 }
15893 case X86::BI__builtin_ia32_sqrtss:
15894 case X86::BI__builtin_ia32_sqrtsd: {
15895 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
15896 Function *F;
15897 if (Builder.getIsFPConstrained()) {
15898 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15899 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15900 A->getType());
15901 A = Builder.CreateConstrainedFPCall(F, {A});
15902 } else {
15903 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15904 A = Builder.CreateCall(F, {A});
15905 }
15906 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15907 }
15908 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15909 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15910 case X86::BI__builtin_ia32_sqrtss_round_mask: {
15911 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
15912 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15913 // otherwise keep the intrinsic.
15914 if (CC != 4) {
15915 Intrinsic::ID IID;
15916
15917 switch (BuiltinID) {
15918 default:
15919 llvm_unreachable("Unsupported intrinsic!");
15920 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15921 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
15922 break;
15923 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15924 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
15925 break;
15926 case X86::BI__builtin_ia32_sqrtss_round_mask:
15927 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
15928 break;
15929 }
15930 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15931 }
15932 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15933 Function *F;
15934 if (Builder.getIsFPConstrained()) {
15935 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15936 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15937 A->getType());
15938 A = Builder.CreateConstrainedFPCall(F, A);
15939 } else {
15940 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15941 A = Builder.CreateCall(F, A);
15942 }
15943 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15944 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
15945 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15946 }
15947 case X86::BI__builtin_ia32_sqrtpd256:
15948 case X86::BI__builtin_ia32_sqrtpd:
15949 case X86::BI__builtin_ia32_sqrtps256:
15950 case X86::BI__builtin_ia32_sqrtps:
15951 case X86::BI__builtin_ia32_sqrtph256:
15952 case X86::BI__builtin_ia32_sqrtph:
15953 case X86::BI__builtin_ia32_sqrtph512:
15954 case X86::BI__builtin_ia32_sqrtps512:
15955 case X86::BI__builtin_ia32_sqrtpd512: {
15956 if (Ops.size() == 2) {
15957 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15958 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15959 // otherwise keep the intrinsic.
15960 if (CC != 4) {
15961 Intrinsic::ID IID;
15962
15963 switch (BuiltinID) {
15964 default:
15965 llvm_unreachable("Unsupported intrinsic!");
15966 case X86::BI__builtin_ia32_sqrtph512:
15967 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
15968 break;
15969 case X86::BI__builtin_ia32_sqrtps512:
15970 IID = Intrinsic::x86_avx512_sqrt_ps_512;
15971 break;
15972 case X86::BI__builtin_ia32_sqrtpd512:
15973 IID = Intrinsic::x86_avx512_sqrt_pd_512;
15974 break;
15975 }
15976 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15977 }
15978 }
15979 if (Builder.getIsFPConstrained()) {
15980 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15981 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15982 Ops[0]->getType());
15983 return Builder.CreateConstrainedFPCall(F, Ops[0]);
15984 } else {
15985 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
15986 return Builder.CreateCall(F, Ops[0]);
15987 }
15988 }
15989
15990 case X86::BI__builtin_ia32_pmuludq128:
15991 case X86::BI__builtin_ia32_pmuludq256:
15992 case X86::BI__builtin_ia32_pmuludq512:
15993 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
15994
15995 case X86::BI__builtin_ia32_pmuldq128:
15996 case X86::BI__builtin_ia32_pmuldq256:
15997 case X86::BI__builtin_ia32_pmuldq512:
15998 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
15999
16000 case X86::BI__builtin_ia32_pternlogd512_mask:
16001 case X86::BI__builtin_ia32_pternlogq512_mask:
16002 case X86::BI__builtin_ia32_pternlogd128_mask:
16003 case X86::BI__builtin_ia32_pternlogd256_mask:
16004 case X86::BI__builtin_ia32_pternlogq128_mask:
16005 case X86::BI__builtin_ia32_pternlogq256_mask:
16006 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
16007
16008 case X86::BI__builtin_ia32_pternlogd512_maskz:
16009 case X86::BI__builtin_ia32_pternlogq512_maskz:
16010 case X86::BI__builtin_ia32_pternlogd128_maskz:
16011 case X86::BI__builtin_ia32_pternlogd256_maskz:
16012 case X86::BI__builtin_ia32_pternlogq128_maskz:
16013 case X86::BI__builtin_ia32_pternlogq256_maskz:
16014 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
16015
16016 case X86::BI__builtin_ia32_vpshldd128:
16017 case X86::BI__builtin_ia32_vpshldd256:
16018 case X86::BI__builtin_ia32_vpshldd512:
16019 case X86::BI__builtin_ia32_vpshldq128:
16020 case X86::BI__builtin_ia32_vpshldq256:
16021 case X86::BI__builtin_ia32_vpshldq512:
16022 case X86::BI__builtin_ia32_vpshldw128:
16023 case X86::BI__builtin_ia32_vpshldw256:
16024 case X86::BI__builtin_ia32_vpshldw512:
16025 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16026
16027 case X86::BI__builtin_ia32_vpshrdd128:
16028 case X86::BI__builtin_ia32_vpshrdd256:
16029 case X86::BI__builtin_ia32_vpshrdd512:
16030 case X86::BI__builtin_ia32_vpshrdq128:
16031 case X86::BI__builtin_ia32_vpshrdq256:
16032 case X86::BI__builtin_ia32_vpshrdq512:
16033 case X86::BI__builtin_ia32_vpshrdw128:
16034 case X86::BI__builtin_ia32_vpshrdw256:
16035 case X86::BI__builtin_ia32_vpshrdw512:
16036 // Ops 0 and 1 are swapped.
16037 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16038
16039 case X86::BI__builtin_ia32_vpshldvd128:
16040 case X86::BI__builtin_ia32_vpshldvd256:
16041 case X86::BI__builtin_ia32_vpshldvd512:
16042 case X86::BI__builtin_ia32_vpshldvq128:
16043 case X86::BI__builtin_ia32_vpshldvq256:
16044 case X86::BI__builtin_ia32_vpshldvq512:
16045 case X86::BI__builtin_ia32_vpshldvw128:
16046 case X86::BI__builtin_ia32_vpshldvw256:
16047 case X86::BI__builtin_ia32_vpshldvw512:
16048 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16049
16050 case X86::BI__builtin_ia32_vpshrdvd128:
16051 case X86::BI__builtin_ia32_vpshrdvd256:
16052 case X86::BI__builtin_ia32_vpshrdvd512:
16053 case X86::BI__builtin_ia32_vpshrdvq128:
16054 case X86::BI__builtin_ia32_vpshrdvq256:
16055 case X86::BI__builtin_ia32_vpshrdvq512:
16056 case X86::BI__builtin_ia32_vpshrdvw128:
16057 case X86::BI__builtin_ia32_vpshrdvw256:
16058 case X86::BI__builtin_ia32_vpshrdvw512:
16059 // Ops 0 and 1 are swapped.
16060 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16061
16062 // Reductions
16063 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16064 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16065 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16066 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16067 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16068 Function *F =
16069 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16070 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16071 Builder.getFastMathFlags().setAllowReassoc();
16072 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16073 }
16074 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16075 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16076 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16077 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16078 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16079 Function *F =
16080 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16081 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16082 Builder.getFastMathFlags().setAllowReassoc();
16083 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16084 }
16085 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16086 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16087 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16088 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16089 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16090 Function *F =
16091 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16092 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16093 Builder.getFastMathFlags().setNoNaNs();
16094 return Builder.CreateCall(F, {Ops[0]});
16095 }
16096 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16097 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16098 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16099 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16100 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16101 Function *F =
16102 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16103 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16104 Builder.getFastMathFlags().setNoNaNs();
16105 return Builder.CreateCall(F, {Ops[0]});
16106 }
16107
16108 case X86::BI__builtin_ia32_rdrand16_step:
16109 case X86::BI__builtin_ia32_rdrand32_step:
16110 case X86::BI__builtin_ia32_rdrand64_step:
16111 case X86::BI__builtin_ia32_rdseed16_step:
16112 case X86::BI__builtin_ia32_rdseed32_step:
16113 case X86::BI__builtin_ia32_rdseed64_step: {
16114 Intrinsic::ID ID;
16115 switch (BuiltinID) {
16116 default: llvm_unreachable("Unsupported intrinsic!");
16117 case X86::BI__builtin_ia32_rdrand16_step:
16118 ID = Intrinsic::x86_rdrand_16;
16119 break;
16120 case X86::BI__builtin_ia32_rdrand32_step:
16121 ID = Intrinsic::x86_rdrand_32;
16122 break;
16123 case X86::BI__builtin_ia32_rdrand64_step:
16124 ID = Intrinsic::x86_rdrand_64;
16125 break;
16126 case X86::BI__builtin_ia32_rdseed16_step:
16127 ID = Intrinsic::x86_rdseed_16;
16128 break;
16129 case X86::BI__builtin_ia32_rdseed32_step:
16130 ID = Intrinsic::x86_rdseed_32;
16131 break;
16132 case X86::BI__builtin_ia32_rdseed64_step:
16133 ID = Intrinsic::x86_rdseed_64;
16134 break;
16135 }
16136
16137 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16138 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16139 Ops[0]);
16140 return Builder.CreateExtractValue(Call, 1);
16141 }
16142 case X86::BI__builtin_ia32_addcarryx_u32:
16143 case X86::BI__builtin_ia32_addcarryx_u64:
16144 case X86::BI__builtin_ia32_subborrow_u32:
16145 case X86::BI__builtin_ia32_subborrow_u64: {
16146 Intrinsic::ID IID;
16147 switch (BuiltinID) {
16148 default: llvm_unreachable("Unsupported intrinsic!");
16149 case X86::BI__builtin_ia32_addcarryx_u32:
16150 IID = Intrinsic::x86_addcarry_32;
16151 break;
16152 case X86::BI__builtin_ia32_addcarryx_u64:
16153 IID = Intrinsic::x86_addcarry_64;
16154 break;
16155 case X86::BI__builtin_ia32_subborrow_u32:
16156 IID = Intrinsic::x86_subborrow_32;
16157 break;
16158 case X86::BI__builtin_ia32_subborrow_u64:
16159 IID = Intrinsic::x86_subborrow_64;
16160 break;
16161 }
16162
16163 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16164 { Ops[0], Ops[1], Ops[2] });
16165 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16166 Ops[3]);
16167 return Builder.CreateExtractValue(Call, 0);
16168 }
16169
16170 case X86::BI__builtin_ia32_fpclassps128_mask:
16171 case X86::BI__builtin_ia32_fpclassps256_mask:
16172 case X86::BI__builtin_ia32_fpclassps512_mask:
16173 case X86::BI__builtin_ia32_fpclassph128_mask:
16174 case X86::BI__builtin_ia32_fpclassph256_mask:
16175 case X86::BI__builtin_ia32_fpclassph512_mask:
16176 case X86::BI__builtin_ia32_fpclasspd128_mask:
16177 case X86::BI__builtin_ia32_fpclasspd256_mask:
16178 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16179 unsigned NumElts =
16180 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16181 Value *MaskIn = Ops[2];
16182 Ops.erase(&Ops[2]);
16183
16184 Intrinsic::ID ID;
16185 switch (BuiltinID) {
16186 default: llvm_unreachable("Unsupported intrinsic!");
16187 case X86::BI__builtin_ia32_fpclassph128_mask:
16188 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16189 break;
16190 case X86::BI__builtin_ia32_fpclassph256_mask:
16191 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16192 break;
16193 case X86::BI__builtin_ia32_fpclassph512_mask:
16194 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16195 break;
16196 case X86::BI__builtin_ia32_fpclassps128_mask:
16197 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16198 break;
16199 case X86::BI__builtin_ia32_fpclassps256_mask:
16200 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16201 break;
16202 case X86::BI__builtin_ia32_fpclassps512_mask:
16203 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16204 break;
16205 case X86::BI__builtin_ia32_fpclasspd128_mask:
16206 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16207 break;
16208 case X86::BI__builtin_ia32_fpclasspd256_mask:
16209 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16210 break;
16211 case X86::BI__builtin_ia32_fpclasspd512_mask:
16212 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16213 break;
16214 }
16215
16216 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16217 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16218 }
16219
16220 case X86::BI__builtin_ia32_vp2intersect_q_512:
16221 case X86::BI__builtin_ia32_vp2intersect_q_256:
16222 case X86::BI__builtin_ia32_vp2intersect_q_128:
16223 case X86::BI__builtin_ia32_vp2intersect_d_512:
16224 case X86::BI__builtin_ia32_vp2intersect_d_256:
16225 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16226 unsigned NumElts =
16227 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16228 Intrinsic::ID ID;
16229
16230 switch (BuiltinID) {
16231 default: llvm_unreachable("Unsupported intrinsic!");
16232 case X86::BI__builtin_ia32_vp2intersect_q_512:
16233 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16234 break;
16235 case X86::BI__builtin_ia32_vp2intersect_q_256:
16236 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16237 break;
16238 case X86::BI__builtin_ia32_vp2intersect_q_128:
16239 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16240 break;
16241 case X86::BI__builtin_ia32_vp2intersect_d_512:
16242 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16243 break;
16244 case X86::BI__builtin_ia32_vp2intersect_d_256:
16245 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16246 break;
16247 case X86::BI__builtin_ia32_vp2intersect_d_128:
16248 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16249 break;
16250 }
16251
16252 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16253 Value *Result = Builder.CreateExtractValue(Call, 0);
16254 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16256
16257 Result = Builder.CreateExtractValue(Call, 1);
16258 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16260 }
16261
16262 case X86::BI__builtin_ia32_vpmultishiftqb128:
16263 case X86::BI__builtin_ia32_vpmultishiftqb256:
16264 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16265 Intrinsic::ID ID;
16266 switch (BuiltinID) {
16267 default: llvm_unreachable("Unsupported intrinsic!");
16268 case X86::BI__builtin_ia32_vpmultishiftqb128:
16269 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16270 break;
16271 case X86::BI__builtin_ia32_vpmultishiftqb256:
16272 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16273 break;
16274 case X86::BI__builtin_ia32_vpmultishiftqb512:
16275 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16276 break;
16277 }
16278
16279 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16280 }
16281
16282 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16283 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16284 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16285 unsigned NumElts =
16286 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16287 Value *MaskIn = Ops[2];
16288 Ops.erase(&Ops[2]);
16289
16290 Intrinsic::ID ID;
16291 switch (BuiltinID) {
16292 default: llvm_unreachable("Unsupported intrinsic!");
16293 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16294 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16295 break;
16296 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16297 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16298 break;
16299 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16300 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16301 break;
16302 }
16303
16304 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16305 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16306 }
16307
16308 // packed comparison intrinsics
16309 case X86::BI__builtin_ia32_cmpeqps:
16310 case X86::BI__builtin_ia32_cmpeqpd:
16311 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16312 case X86::BI__builtin_ia32_cmpltps:
16313 case X86::BI__builtin_ia32_cmpltpd:
16314 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16315 case X86::BI__builtin_ia32_cmpleps:
16316 case X86::BI__builtin_ia32_cmplepd:
16317 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16318 case X86::BI__builtin_ia32_cmpunordps:
16319 case X86::BI__builtin_ia32_cmpunordpd:
16320 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16321 case X86::BI__builtin_ia32_cmpneqps:
16322 case X86::BI__builtin_ia32_cmpneqpd:
16323 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16324 case X86::BI__builtin_ia32_cmpnltps:
16325 case X86::BI__builtin_ia32_cmpnltpd:
16326 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
16327 case X86::BI__builtin_ia32_cmpnleps:
16328 case X86::BI__builtin_ia32_cmpnlepd:
16329 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
16330 case X86::BI__builtin_ia32_cmpordps:
16331 case X86::BI__builtin_ia32_cmpordpd:
16332 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
16333 case X86::BI__builtin_ia32_cmpph128_mask:
16334 case X86::BI__builtin_ia32_cmpph256_mask:
16335 case X86::BI__builtin_ia32_cmpph512_mask:
16336 case X86::BI__builtin_ia32_cmpps128_mask:
16337 case X86::BI__builtin_ia32_cmpps256_mask:
16338 case X86::BI__builtin_ia32_cmpps512_mask:
16339 case X86::BI__builtin_ia32_cmppd128_mask:
16340 case X86::BI__builtin_ia32_cmppd256_mask:
16341 case X86::BI__builtin_ia32_cmppd512_mask:
16342 IsMaskFCmp = true;
16343 [[fallthrough]];
16344 case X86::BI__builtin_ia32_cmpps:
16345 case X86::BI__builtin_ia32_cmpps256:
16346 case X86::BI__builtin_ia32_cmppd:
16347 case X86::BI__builtin_ia32_cmppd256: {
16348 // Lowering vector comparisons to fcmp instructions, while
16349 // ignoring signalling behaviour requested
16350 // ignoring rounding mode requested
16351 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16352
16353 // The third argument is the comparison condition, and integer in the
16354 // range [0, 31]
16355 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16356
16357 // Lowering to IR fcmp instruction.
16358 // Ignoring requested signaling behaviour,
16359 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16360 FCmpInst::Predicate Pred;
16361 bool IsSignaling;
16362 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16363 // behavior is inverted. We'll handle that after the switch.
16364 switch (CC & 0xf) {
16365 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
16366 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
16367 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
16368 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
16369 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
16370 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
16371 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
16372 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
16373 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
16374 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
16375 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
16376 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
16377 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
16378 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
16379 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
16380 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
16381 default: llvm_unreachable("Unhandled CC");
16382 }
16383
16384 // Invert the signalling behavior for 16-31.
16385 if (CC & 0x10)
16386 IsSignaling = !IsSignaling;
16387
16388 // If the predicate is true or false and we're using constrained intrinsics,
16389 // we don't have a compare intrinsic we can use. Just use the legacy X86
16390 // specific intrinsic.
16391 // If the intrinsic is mask enabled and we're using constrained intrinsics,
16392 // use the legacy X86 specific intrinsic.
16393 if (Builder.getIsFPConstrained() &&
16394 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
16395 IsMaskFCmp)) {
16396
16397 Intrinsic::ID IID;
16398 switch (BuiltinID) {
16399 default: llvm_unreachable("Unexpected builtin");
16400 case X86::BI__builtin_ia32_cmpps:
16401 IID = Intrinsic::x86_sse_cmp_ps;
16402 break;
16403 case X86::BI__builtin_ia32_cmpps256:
16404 IID = Intrinsic::x86_avx_cmp_ps_256;
16405 break;
16406 case X86::BI__builtin_ia32_cmppd:
16407 IID = Intrinsic::x86_sse2_cmp_pd;
16408 break;
16409 case X86::BI__builtin_ia32_cmppd256:
16410 IID = Intrinsic::x86_avx_cmp_pd_256;
16411 break;
16412 case X86::BI__builtin_ia32_cmpph128_mask:
16413 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
16414 break;
16415 case X86::BI__builtin_ia32_cmpph256_mask:
16416 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
16417 break;
16418 case X86::BI__builtin_ia32_cmpph512_mask:
16419 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
16420 break;
16421 case X86::BI__builtin_ia32_cmpps512_mask:
16422 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
16423 break;
16424 case X86::BI__builtin_ia32_cmppd512_mask:
16425 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
16426 break;
16427 case X86::BI__builtin_ia32_cmpps128_mask:
16428 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
16429 break;
16430 case X86::BI__builtin_ia32_cmpps256_mask:
16431 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
16432 break;
16433 case X86::BI__builtin_ia32_cmppd128_mask:
16434 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
16435 break;
16436 case X86::BI__builtin_ia32_cmppd256_mask:
16437 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
16438 break;
16439 }
16440
16441 Function *Intr = CGM.getIntrinsic(IID);
16442 if (IsMaskFCmp) {
16443 unsigned NumElts =
16444 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16445 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
16446 Value *Cmp = Builder.CreateCall(Intr, Ops);
16447 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
16448 }
16449
16450 return Builder.CreateCall(Intr, Ops);
16451 }
16452
16453 // Builtins without the _mask suffix return a vector of integers
16454 // of the same width as the input vectors
16455 if (IsMaskFCmp) {
16456 // We ignore SAE if strict FP is disabled. We only keep precise
16457 // exception behavior under strict FP.
16458 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
16459 // object will be required.
16460 unsigned NumElts =
16461 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16462 Value *Cmp;
16463 if (IsSignaling)
16464 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
16465 else
16466 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
16467 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
16468 }
16469
16470 return getVectorFCmpIR(Pred, IsSignaling);
16471 }
16472
16473 // SSE scalar comparison intrinsics
16474 case X86::BI__builtin_ia32_cmpeqss:
16475 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
16476 case X86::BI__builtin_ia32_cmpltss:
16477 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
16478 case X86::BI__builtin_ia32_cmpless:
16479 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
16480 case X86::BI__builtin_ia32_cmpunordss:
16481 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
16482 case X86::BI__builtin_ia32_cmpneqss:
16483 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
16484 case X86::BI__builtin_ia32_cmpnltss:
16485 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
16486 case X86::BI__builtin_ia32_cmpnless:
16487 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
16488 case X86::BI__builtin_ia32_cmpordss:
16489 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
16490 case X86::BI__builtin_ia32_cmpeqsd:
16491 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
16492 case X86::BI__builtin_ia32_cmpltsd:
16493 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
16494 case X86::BI__builtin_ia32_cmplesd:
16495 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
16496 case X86::BI__builtin_ia32_cmpunordsd:
16497 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
16498 case X86::BI__builtin_ia32_cmpneqsd:
16499 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
16500 case X86::BI__builtin_ia32_cmpnltsd:
16501 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
16502 case X86::BI__builtin_ia32_cmpnlesd:
16503 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
16504 case X86::BI__builtin_ia32_cmpordsd:
16505 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
16506
16507 // f16c half2float intrinsics
16508 case X86::BI__builtin_ia32_vcvtph2ps:
16509 case X86::BI__builtin_ia32_vcvtph2ps256:
16510 case X86::BI__builtin_ia32_vcvtph2ps_mask:
16511 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
16512 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
16513 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16514 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
16515 }
16516
16517 // AVX512 bf16 intrinsics
16518 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
16519 Ops[2] = getMaskVecValue(
16520 *this, Ops[2],
16521 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
16522 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
16523 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16524 }
16525 case X86::BI__builtin_ia32_cvtsbf162ss_32:
16526 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
16527
16528 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16529 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
16530 Intrinsic::ID IID;
16531 switch (BuiltinID) {
16532 default: llvm_unreachable("Unsupported intrinsic!");
16533 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16534 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
16535 break;
16536 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
16537 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
16538 break;
16539 }
16540 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
16541 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
16542 }
16543
16544 case X86::BI__cpuid:
16545 case X86::BI__cpuidex: {
16546 Value *FuncId = EmitScalarExpr(E->getArg(1));
16547 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
16548 ? EmitScalarExpr(E->getArg(2))
16549 : llvm::ConstantInt::get(Int32Ty, 0);
16550
16551 llvm::StructType *CpuidRetTy =
16552 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
16553 llvm::FunctionType *FTy =
16554 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
16555
16556 StringRef Asm, Constraints;
16557 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
16558 Asm = "cpuid";
16559 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
16560 } else {
16561 // x86-64 uses %rbx as the base register, so preserve it.
16562 Asm = "xchgq %rbx, ${1:q}\n"
16563 "cpuid\n"
16564 "xchgq %rbx, ${1:q}";
16565 Constraints = "={ax},=r,={cx},={dx},0,2";
16566 }
16567
16568 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
16569 /*hasSideEffects=*/false);
16570 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
16571 Value *BasePtr = EmitScalarExpr(E->getArg(0));
16572 Value *Store = nullptr;
16573 for (unsigned i = 0; i < 4; i++) {
16574 Value *Extracted = Builder.CreateExtractValue(IACall, i);
16575 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
16576 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
16577 }
16578
16579 // Return the last store instruction to signal that we have emitted the
16580 // the intrinsic.
16581 return Store;
16582 }
16583
16584 case X86::BI__emul:
16585 case X86::BI__emulu: {
16586 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
16587 bool isSigned = (BuiltinID == X86::BI__emul);
16588 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
16589 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
16590 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
16591 }
16592 case X86::BI__mulh:
16593 case X86::BI__umulh:
16594 case X86::BI_mul128:
16595 case X86::BI_umul128: {
16596 llvm::Type *ResType = ConvertType(E->getType());
16597 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16598
16599 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
16600 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
16601 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
16602
16603 Value *MulResult, *HigherBits;
16604 if (IsSigned) {
16605 MulResult = Builder.CreateNSWMul(LHS, RHS);
16606 HigherBits = Builder.CreateAShr(MulResult, 64);
16607 } else {
16608 MulResult = Builder.CreateNUWMul(LHS, RHS);
16609 HigherBits = Builder.CreateLShr(MulResult, 64);
16610 }
16611 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
16612
16613 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
16614 return HigherBits;
16615
16616 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
16617 Builder.CreateStore(HigherBits, HighBitsAddress);
16618 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
16619 }
16620
16621 case X86::BI__faststorefence: {
16622 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16623 llvm::SyncScope::System);
16624 }
16625 case X86::BI__shiftleft128:
16626 case X86::BI__shiftright128: {
16627 llvm::Function *F = CGM.getIntrinsic(
16628 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
16629 Int64Ty);
16630 // Flip low/high ops and zero-extend amount to matching type.
16631 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
16632 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
16633 std::swap(Ops[0], Ops[1]);
16634 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
16635 return Builder.CreateCall(F, Ops);
16636 }
16637 case X86::BI_ReadWriteBarrier:
16638 case X86::BI_ReadBarrier:
16639 case X86::BI_WriteBarrier: {
16640 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16641 llvm::SyncScope::SingleThread);
16642 }
16643
16644 case X86::BI_AddressOfReturnAddress: {
16645 Function *F =
16646 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
16647 return Builder.CreateCall(F);
16648 }
16649 case X86::BI__stosb: {
16650 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
16651 // instruction, but it will create a memset that won't be optimized away.
16652 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
16653 }
16654 case X86::BI__ud2:
16655 // llvm.trap makes a ud2a instruction on x86.
16656 return EmitTrapCall(Intrinsic::trap);
16657 case X86::BI__int2c: {
16658 // This syscall signals a driver assertion failure in x86 NT kernels.
16659 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
16660 llvm::InlineAsm *IA =
16661 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
16662 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
16663 getLLVMContext(), llvm::AttributeList::FunctionIndex,
16664 llvm::Attribute::NoReturn);
16665 llvm::CallInst *CI = Builder.CreateCall(IA);
16666 CI->setAttributes(NoReturnAttr);
16667 return CI;
16668 }
16669 case X86::BI__readfsbyte:
16670 case X86::BI__readfsword:
16671 case X86::BI__readfsdword:
16672 case X86::BI__readfsqword: {
16673 llvm::Type *IntTy = ConvertType(E->getType());
16674 Value *Ptr = Builder.CreateIntToPtr(
16675 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
16676 LoadInst *Load = Builder.CreateAlignedLoad(
16677 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16678 Load->setVolatile(true);
16679 return Load;
16680 }
16681 case X86::BI__readgsbyte:
16682 case X86::BI__readgsword:
16683 case X86::BI__readgsdword:
16684 case X86::BI__readgsqword: {
16685 llvm::Type *IntTy = ConvertType(E->getType());
16686 Value *Ptr = Builder.CreateIntToPtr(
16687 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
16688 LoadInst *Load = Builder.CreateAlignedLoad(
16689 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16690 Load->setVolatile(true);
16691 return Load;
16692 }
16693 case X86::BI__builtin_ia32_encodekey128_u32: {
16694 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
16695
16696 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
16697
16698 for (int i = 0; i < 3; ++i) {
16699 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16700 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
16701 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16702 }
16703
16704 return Builder.CreateExtractValue(Call, 0);
16705 }
16706 case X86::BI__builtin_ia32_encodekey256_u32: {
16707 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
16708
16709 Value *Call =
16710 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
16711
16712 for (int i = 0; i < 4; ++i) {
16713 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16714 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
16715 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16716 }
16717
16718 return Builder.CreateExtractValue(Call, 0);
16719 }
16720 case X86::BI__builtin_ia32_aesenc128kl_u8:
16721 case X86::BI__builtin_ia32_aesdec128kl_u8:
16722 case X86::BI__builtin_ia32_aesenc256kl_u8:
16723 case X86::BI__builtin_ia32_aesdec256kl_u8: {
16724 Intrinsic::ID IID;
16725 StringRef BlockName;
16726 switch (BuiltinID) {
16727 default:
16728 llvm_unreachable("Unexpected builtin");
16729 case X86::BI__builtin_ia32_aesenc128kl_u8:
16730 IID = Intrinsic::x86_aesenc128kl;
16731 BlockName = "aesenc128kl";
16732 break;
16733 case X86::BI__builtin_ia32_aesdec128kl_u8:
16734 IID = Intrinsic::x86_aesdec128kl;
16735 BlockName = "aesdec128kl";
16736 break;
16737 case X86::BI__builtin_ia32_aesenc256kl_u8:
16738 IID = Intrinsic::x86_aesenc256kl;
16739 BlockName = "aesenc256kl";
16740 break;
16741 case X86::BI__builtin_ia32_aesdec256kl_u8:
16742 IID = Intrinsic::x86_aesdec256kl;
16743 BlockName = "aesdec256kl";
16744 break;
16745 }
16746
16747 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
16748
16749 BasicBlock *NoError =
16750 createBasicBlock(BlockName + "_no_error", this->CurFn);
16751 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16752 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16753
16754 Value *Ret = Builder.CreateExtractValue(Call, 0);
16755 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16756 Value *Out = Builder.CreateExtractValue(Call, 1);
16757 Builder.CreateCondBr(Succ, NoError, Error);
16758
16759 Builder.SetInsertPoint(NoError);
16761 Builder.CreateBr(End);
16762
16763 Builder.SetInsertPoint(Error);
16764 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16765 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
16766 Builder.CreateBr(End);
16767
16768 Builder.SetInsertPoint(End);
16769 return Builder.CreateExtractValue(Call, 0);
16770 }
16771 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16772 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16773 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16774 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
16775 Intrinsic::ID IID;
16776 StringRef BlockName;
16777 switch (BuiltinID) {
16778 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16779 IID = Intrinsic::x86_aesencwide128kl;
16780 BlockName = "aesencwide128kl";
16781 break;
16782 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16783 IID = Intrinsic::x86_aesdecwide128kl;
16784 BlockName = "aesdecwide128kl";
16785 break;
16786 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16787 IID = Intrinsic::x86_aesencwide256kl;
16788 BlockName = "aesencwide256kl";
16789 break;
16790 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
16791 IID = Intrinsic::x86_aesdecwide256kl;
16792 BlockName = "aesdecwide256kl";
16793 break;
16794 }
16795
16796 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
16797 Value *InOps[9];
16798 InOps[0] = Ops[2];
16799 for (int i = 0; i != 8; ++i) {
16800 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
16801 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
16802 }
16803
16804 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
16805
16806 BasicBlock *NoError =
16807 createBasicBlock(BlockName + "_no_error", this->CurFn);
16808 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16809 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16810
16811 Value *Ret = Builder.CreateExtractValue(Call, 0);
16812 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16813 Builder.CreateCondBr(Succ, NoError, Error);
16814
16815 Builder.SetInsertPoint(NoError);
16816 for (int i = 0; i != 8; ++i) {
16817 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16818 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
16819 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
16820 }
16821 Builder.CreateBr(End);
16822
16823 Builder.SetInsertPoint(Error);
16824 for (int i = 0; i != 8; ++i) {
16825 Value *Out = Builder.CreateExtractValue(Call, i + 1);
16826 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16827 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
16828 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
16829 }
16830 Builder.CreateBr(End);
16831
16832 Builder.SetInsertPoint(End);
16833 return Builder.CreateExtractValue(Call, 0);
16834 }
16835 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
16836 IsConjFMA = true;
16837 [[fallthrough]];
16838 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
16839 Intrinsic::ID IID = IsConjFMA
16840 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
16841 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
16842 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16843 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16844 }
16845 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
16846 IsConjFMA = true;
16847 [[fallthrough]];
16848 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
16849 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16850 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16851 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16852 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
16853 return EmitX86Select(*this, And, Call, Ops[0]);
16854 }
16855 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
16856 IsConjFMA = true;
16857 [[fallthrough]];
16858 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
16859 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16860 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16861 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16862 static constexpr int Mask[] = {0, 5, 6, 7};
16863 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
16864 }
16865 case X86::BI__builtin_ia32_prefetchi:
16866 return Builder.CreateCall(
16867 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
16868 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
16869 llvm::ConstantInt::get(Int32Ty, 0)});
16870 }
16871}
16872
16873Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
16874 const CallExpr *E) {
16875 // Do not emit the builtin arguments in the arguments of a function call,
16876 // because the evaluation order of function arguments is not specified in C++.
16877 // This is important when testing to ensure the arguments are emitted in the
16878 // same order every time. Eg:
16879 // Instead of:
16880 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
16881 // EmitScalarExpr(E->getArg(1)), "swdiv");
16882 // Use:
16883 // Value *Op0 = EmitScalarExpr(E->getArg(0));
16884 // Value *Op1 = EmitScalarExpr(E->getArg(1));
16885 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
16886
16887 Intrinsic::ID ID = Intrinsic::not_intrinsic;
16888
16889#include "llvm/TargetParser/PPCTargetParser.def"
16890 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
16891 unsigned Mask, CmpInst::Predicate CompOp,
16892 unsigned OpValue) -> Value * {
16893 if (SupportMethod == BUILTIN_PPC_FALSE)
16894 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
16895
16896 if (SupportMethod == BUILTIN_PPC_TRUE)
16897 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
16898
16899 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
16900
16901 llvm::Value *FieldValue = nullptr;
16902 if (SupportMethod == USE_SYS_CONF) {
16903 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
16904 llvm::Constant *SysConf =
16905 CGM.CreateRuntimeVariable(STy, "_system_configuration");
16906
16907 // Grab the appropriate field from _system_configuration.
16908 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
16909 ConstantInt::get(Int32Ty, FieldIdx)};
16910
16911 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
16912 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
16914 } else if (SupportMethod == SYS_CALL) {
16915 llvm::FunctionType *FTy =
16916 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
16917 llvm::FunctionCallee Func =
16918 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
16919
16920 FieldValue =
16921 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
16922 }
16923 assert(FieldValue &&
16924 "SupportMethod value is not defined in PPCTargetParser.def.");
16925
16926 if (Mask)
16927 FieldValue = Builder.CreateAnd(FieldValue, Mask);
16928
16929 llvm::Type *ValueType = FieldValue->getType();
16930 bool IsValueType64Bit = ValueType->isIntegerTy(64);
16931 assert(
16932 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
16933 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
16934
16935 return Builder.CreateICmp(
16936 CompOp, FieldValue,
16937 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
16938 };
16939
16940 switch (BuiltinID) {
16941 default: return nullptr;
16942
16943 case Builtin::BI__builtin_cpu_is: {
16944 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16945 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16946 llvm::Triple Triple = getTarget().getTriple();
16947
16948 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
16949 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
16950
16951 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
16952 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
16953#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
16954 AIXID) \
16955 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
16956#include "llvm/TargetParser/PPCTargetParser.def"
16957 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
16958 BUILTIN_PPC_UNSUPPORTED, 0}));
16959
16960 if (Triple.isOSAIX()) {
16961 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
16962 "Invalid CPU name. Missed by SemaChecking?");
16963 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
16964 ICmpInst::ICMP_EQ, AIXIDValue);
16965 }
16966
16967 assert(Triple.isOSLinux() &&
16968 "__builtin_cpu_is() is only supported for AIX and Linux.");
16969
16970 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
16971 "Invalid CPU name. Missed by SemaChecking?");
16972
16973 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
16974 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
16975
16976 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
16977 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16978 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
16979 return Builder.CreateICmpEQ(TheCall,
16980 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
16981 }
16982 case Builtin::BI__builtin_cpu_supports: {
16983 llvm::Triple Triple = getTarget().getTriple();
16984 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16985 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16986 if (Triple.isOSAIX()) {
16987 unsigned SupportMethod, FieldIdx, Mask, Value;
16988 CmpInst::Predicate CompOp;
16989 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
16990 unsigned>
16991 CPUSupportType;
16992 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
16993 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
16994#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
16995 VALUE) \
16996 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
16997#include "llvm/TargetParser/PPCTargetParser.def"
16998 .Default({BUILTIN_PPC_FALSE, 0, 0,
16999 CmpInst::Predicate(), 0}));
17000 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17001 Value);
17002 }
17003
17004 assert(Triple.isOSLinux() &&
17005 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17006 unsigned FeatureWord;
17007 unsigned BitMask;
17008 std::tie(FeatureWord, BitMask) =
17009 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17010#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17011 .Case(Name, {FA_WORD, Bitmask})
17012#include "llvm/TargetParser/PPCTargetParser.def"
17013 .Default({0, 0});
17014 if (!BitMask)
17015 return Builder.getFalse();
17016 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
17017 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17018 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
17019 Value *Mask =
17020 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
17021 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
17022#undef PPC_FAWORD_HWCAP
17023#undef PPC_FAWORD_HWCAP2
17024#undef PPC_FAWORD_CPUID
17025 }
17026
17027 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17028 // call __builtin_readcyclecounter.
17029 case PPC::BI__builtin_ppc_get_timebase:
17030 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
17031
17032 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17033 case PPC::BI__builtin_altivec_lvx:
17034 case PPC::BI__builtin_altivec_lvxl:
17035 case PPC::BI__builtin_altivec_lvebx:
17036 case PPC::BI__builtin_altivec_lvehx:
17037 case PPC::BI__builtin_altivec_lvewx:
17038 case PPC::BI__builtin_altivec_lvsl:
17039 case PPC::BI__builtin_altivec_lvsr:
17040 case PPC::BI__builtin_vsx_lxvd2x:
17041 case PPC::BI__builtin_vsx_lxvw4x:
17042 case PPC::BI__builtin_vsx_lxvd2x_be:
17043 case PPC::BI__builtin_vsx_lxvw4x_be:
17044 case PPC::BI__builtin_vsx_lxvl:
17045 case PPC::BI__builtin_vsx_lxvll:
17046 {
17048 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17049 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17050 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17051 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17052 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17053 Ops.pop_back();
17054 }
17055
17056 switch (BuiltinID) {
17057 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17058 case PPC::BI__builtin_altivec_lvx:
17059 ID = Intrinsic::ppc_altivec_lvx;
17060 break;
17061 case PPC::BI__builtin_altivec_lvxl:
17062 ID = Intrinsic::ppc_altivec_lvxl;
17063 break;
17064 case PPC::BI__builtin_altivec_lvebx:
17065 ID = Intrinsic::ppc_altivec_lvebx;
17066 break;
17067 case PPC::BI__builtin_altivec_lvehx:
17068 ID = Intrinsic::ppc_altivec_lvehx;
17069 break;
17070 case PPC::BI__builtin_altivec_lvewx:
17071 ID = Intrinsic::ppc_altivec_lvewx;
17072 break;
17073 case PPC::BI__builtin_altivec_lvsl:
17074 ID = Intrinsic::ppc_altivec_lvsl;
17075 break;
17076 case PPC::BI__builtin_altivec_lvsr:
17077 ID = Intrinsic::ppc_altivec_lvsr;
17078 break;
17079 case PPC::BI__builtin_vsx_lxvd2x:
17080 ID = Intrinsic::ppc_vsx_lxvd2x;
17081 break;
17082 case PPC::BI__builtin_vsx_lxvw4x:
17083 ID = Intrinsic::ppc_vsx_lxvw4x;
17084 break;
17085 case PPC::BI__builtin_vsx_lxvd2x_be:
17086 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17087 break;
17088 case PPC::BI__builtin_vsx_lxvw4x_be:
17089 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17090 break;
17091 case PPC::BI__builtin_vsx_lxvl:
17092 ID = Intrinsic::ppc_vsx_lxvl;
17093 break;
17094 case PPC::BI__builtin_vsx_lxvll:
17095 ID = Intrinsic::ppc_vsx_lxvll;
17096 break;
17097 }
17098 llvm::Function *F = CGM.getIntrinsic(ID);
17099 return Builder.CreateCall(F, Ops, "");
17100 }
17101
17102 // vec_st, vec_xst_be
17103 case PPC::BI__builtin_altivec_stvx:
17104 case PPC::BI__builtin_altivec_stvxl:
17105 case PPC::BI__builtin_altivec_stvebx:
17106 case PPC::BI__builtin_altivec_stvehx:
17107 case PPC::BI__builtin_altivec_stvewx:
17108 case PPC::BI__builtin_vsx_stxvd2x:
17109 case PPC::BI__builtin_vsx_stxvw4x:
17110 case PPC::BI__builtin_vsx_stxvd2x_be:
17111 case PPC::BI__builtin_vsx_stxvw4x_be:
17112 case PPC::BI__builtin_vsx_stxvl:
17113 case PPC::BI__builtin_vsx_stxvll:
17114 {
17116 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17117 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17118 Ops.push_back(EmitScalarExpr(E->getArg(2)));
17119 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17120 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17121 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17122 Ops.pop_back();
17123 }
17124
17125 switch (BuiltinID) {
17126 default: llvm_unreachable("Unsupported st intrinsic!");
17127 case PPC::BI__builtin_altivec_stvx:
17128 ID = Intrinsic::ppc_altivec_stvx;
17129 break;
17130 case PPC::BI__builtin_altivec_stvxl:
17131 ID = Intrinsic::ppc_altivec_stvxl;
17132 break;
17133 case PPC::BI__builtin_altivec_stvebx:
17134 ID = Intrinsic::ppc_altivec_stvebx;
17135 break;
17136 case PPC::BI__builtin_altivec_stvehx:
17137 ID = Intrinsic::ppc_altivec_stvehx;
17138 break;
17139 case PPC::BI__builtin_altivec_stvewx:
17140 ID = Intrinsic::ppc_altivec_stvewx;
17141 break;
17142 case PPC::BI__builtin_vsx_stxvd2x:
17143 ID = Intrinsic::ppc_vsx_stxvd2x;
17144 break;
17145 case PPC::BI__builtin_vsx_stxvw4x:
17146 ID = Intrinsic::ppc_vsx_stxvw4x;
17147 break;
17148 case PPC::BI__builtin_vsx_stxvd2x_be:
17149 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17150 break;
17151 case PPC::BI__builtin_vsx_stxvw4x_be:
17152 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17153 break;
17154 case PPC::BI__builtin_vsx_stxvl:
17155 ID = Intrinsic::ppc_vsx_stxvl;
17156 break;
17157 case PPC::BI__builtin_vsx_stxvll:
17158 ID = Intrinsic::ppc_vsx_stxvll;
17159 break;
17160 }
17161 llvm::Function *F = CGM.getIntrinsic(ID);
17162 return Builder.CreateCall(F, Ops, "");
17163 }
17164 case PPC::BI__builtin_vsx_ldrmb: {
17165 // Essentially boils down to performing an unaligned VMX load sequence so
17166 // as to avoid crossing a page boundary and then shuffling the elements
17167 // into the right side of the vector register.
17168 Value *Op0 = EmitScalarExpr(E->getArg(0));
17169 Value *Op1 = EmitScalarExpr(E->getArg(1));
17170 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17171 llvm::Type *ResTy = ConvertType(E->getType());
17172 bool IsLE = getTarget().isLittleEndian();
17173
17174 // If the user wants the entire vector, just load the entire vector.
17175 if (NumBytes == 16) {
17176 Value *LD =
17178 if (!IsLE)
17179 return LD;
17180
17181 // Reverse the bytes on LE.
17182 SmallVector<int, 16> RevMask;
17183 for (int Idx = 0; Idx < 16; Idx++)
17184 RevMask.push_back(15 - Idx);
17185 return Builder.CreateShuffleVector(LD, LD, RevMask);
17186 }
17187
17188 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17189 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17190 : Intrinsic::ppc_altivec_lvsl);
17191 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17192 Value *HiMem = Builder.CreateGEP(
17193 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
17194 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
17195 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
17196 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
17197
17198 Op0 = IsLE ? HiLd : LoLd;
17199 Op1 = IsLE ? LoLd : HiLd;
17200 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
17201 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
17202
17203 if (IsLE) {
17204 SmallVector<int, 16> Consts;
17205 for (int Idx = 0; Idx < 16; Idx++) {
17206 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17207 : 16 - (NumBytes - Idx);
17208 Consts.push_back(Val);
17209 }
17210 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
17211 Zero, Consts);
17212 }
17214 for (int Idx = 0; Idx < 16; Idx++)
17215 Consts.push_back(Builder.getInt8(NumBytes + Idx));
17216 Value *Mask2 = ConstantVector::get(Consts);
17217 return Builder.CreateBitCast(
17218 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
17219 }
17220 case PPC::BI__builtin_vsx_strmb: {
17221 Value *Op0 = EmitScalarExpr(E->getArg(0));
17222 Value *Op1 = EmitScalarExpr(E->getArg(1));
17223 Value *Op2 = EmitScalarExpr(E->getArg(2));
17224 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17225 bool IsLE = getTarget().isLittleEndian();
17226 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17227 // Storing the whole vector, simply store it on BE and reverse bytes and
17228 // store on LE.
17229 if (Width == 16) {
17230 Value *StVec = Op2;
17231 if (IsLE) {
17232 SmallVector<int, 16> RevMask;
17233 for (int Idx = 0; Idx < 16; Idx++)
17234 RevMask.push_back(15 - Idx);
17235 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
17236 }
17237 return Builder.CreateStore(
17238 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
17239 }
17240 auto *ConvTy = Int64Ty;
17241 unsigned NumElts = 0;
17242 switch (Width) {
17243 default:
17244 llvm_unreachable("width for stores must be a power of 2");
17245 case 8:
17246 ConvTy = Int64Ty;
17247 NumElts = 2;
17248 break;
17249 case 4:
17250 ConvTy = Int32Ty;
17251 NumElts = 4;
17252 break;
17253 case 2:
17254 ConvTy = Int16Ty;
17255 NumElts = 8;
17256 break;
17257 case 1:
17258 ConvTy = Int8Ty;
17259 NumElts = 16;
17260 break;
17261 }
17262 Value *Vec = Builder.CreateBitCast(
17263 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
17264 Value *Ptr =
17265 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
17266 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
17267 if (IsLE && Width > 1) {
17268 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
17269 Elt = Builder.CreateCall(F, Elt);
17270 }
17271 return Builder.CreateStore(
17272 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
17273 };
17274 unsigned Stored = 0;
17275 unsigned RemainingBytes = NumBytes;
17276 Value *Result;
17277 if (NumBytes == 16)
17278 return StoreSubVec(16, 0, 0);
17279 if (NumBytes >= 8) {
17280 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
17281 RemainingBytes -= 8;
17282 Stored += 8;
17283 }
17284 if (RemainingBytes >= 4) {
17285 Result = StoreSubVec(4, NumBytes - Stored - 4,
17286 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
17287 RemainingBytes -= 4;
17288 Stored += 4;
17289 }
17290 if (RemainingBytes >= 2) {
17291 Result = StoreSubVec(2, NumBytes - Stored - 2,
17292 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17293 RemainingBytes -= 2;
17294 Stored += 2;
17295 }
17296 if (RemainingBytes)
17297 Result =
17298 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
17299 return Result;
17300 }
17301 // Square root
17302 case PPC::BI__builtin_vsx_xvsqrtsp:
17303 case PPC::BI__builtin_vsx_xvsqrtdp: {
17304 llvm::Type *ResultType = ConvertType(E->getType());
17305 Value *X = EmitScalarExpr(E->getArg(0));
17306 if (Builder.getIsFPConstrained()) {
17307 llvm::Function *F = CGM.getIntrinsic(
17308 Intrinsic::experimental_constrained_sqrt, ResultType);
17309 return Builder.CreateConstrainedFPCall(F, X);
17310 } else {
17311 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17312 return Builder.CreateCall(F, X);
17313 }
17314 }
17315 // Count leading zeros
17316 case PPC::BI__builtin_altivec_vclzb:
17317 case PPC::BI__builtin_altivec_vclzh:
17318 case PPC::BI__builtin_altivec_vclzw:
17319 case PPC::BI__builtin_altivec_vclzd: {
17320 llvm::Type *ResultType = ConvertType(E->getType());
17321 Value *X = EmitScalarExpr(E->getArg(0));
17322 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17323 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
17324 return Builder.CreateCall(F, {X, Undef});
17325 }
17326 case PPC::BI__builtin_altivec_vctzb:
17327 case PPC::BI__builtin_altivec_vctzh:
17328 case PPC::BI__builtin_altivec_vctzw:
17329 case PPC::BI__builtin_altivec_vctzd: {
17330 llvm::Type *ResultType = ConvertType(E->getType());
17331 Value *X = EmitScalarExpr(E->getArg(0));
17332 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17333 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
17334 return Builder.CreateCall(F, {X, Undef});
17335 }
17336 case PPC::BI__builtin_altivec_vinsd:
17337 case PPC::BI__builtin_altivec_vinsw:
17338 case PPC::BI__builtin_altivec_vinsd_elt:
17339 case PPC::BI__builtin_altivec_vinsw_elt: {
17340 llvm::Type *ResultType = ConvertType(E->getType());
17341 Value *Op0 = EmitScalarExpr(E->getArg(0));
17342 Value *Op1 = EmitScalarExpr(E->getArg(1));
17343 Value *Op2 = EmitScalarExpr(E->getArg(2));
17344
17345 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17346 BuiltinID == PPC::BI__builtin_altivec_vinsd);
17347
17348 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17349 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
17350
17351 // The third argument must be a compile time constant.
17352 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17353 assert(ArgCI &&
17354 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
17355
17356 // Valid value for the third argument is dependent on the input type and
17357 // builtin called.
17358 int ValidMaxValue = 0;
17359 if (IsUnaligned)
17360 ValidMaxValue = (Is32bit) ? 12 : 8;
17361 else
17362 ValidMaxValue = (Is32bit) ? 3 : 1;
17363
17364 // Get value of third argument.
17365 int64_t ConstArg = ArgCI->getSExtValue();
17366
17367 // Compose range checking error message.
17368 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
17369 RangeErrMsg += " number " + llvm::to_string(ConstArg);
17370 RangeErrMsg += " is outside of the valid range [0, ";
17371 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
17372
17373 // Issue error if third argument is not within the valid range.
17374 if (ConstArg < 0 || ConstArg > ValidMaxValue)
17375 CGM.Error(E->getExprLoc(), RangeErrMsg);
17376
17377 // Input to vec_replace_elt is an element index, convert to byte index.
17378 if (!IsUnaligned) {
17379 ConstArg *= Is32bit ? 4 : 8;
17380 // Fix the constant according to endianess.
17381 if (getTarget().isLittleEndian())
17382 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
17383 }
17384
17385 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
17386 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
17387 // Casting input to vector int as per intrinsic definition.
17388 Op0 =
17389 Is32bit
17390 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
17391 : Builder.CreateBitCast(Op0,
17392 llvm::FixedVectorType::get(Int64Ty, 2));
17393 return Builder.CreateBitCast(
17394 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
17395 }
17396 case PPC::BI__builtin_altivec_vpopcntb:
17397 case PPC::BI__builtin_altivec_vpopcnth:
17398 case PPC::BI__builtin_altivec_vpopcntw:
17399 case PPC::BI__builtin_altivec_vpopcntd: {
17400 llvm::Type *ResultType = ConvertType(E->getType());
17401 Value *X = EmitScalarExpr(E->getArg(0));
17402 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
17403 return Builder.CreateCall(F, X);
17404 }
17405 case PPC::BI__builtin_altivec_vadduqm:
17406 case PPC::BI__builtin_altivec_vsubuqm: {
17407 Value *Op0 = EmitScalarExpr(E->getArg(0));
17408 Value *Op1 = EmitScalarExpr(E->getArg(1));
17409 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17410 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
17411 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
17412 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
17413 return Builder.CreateAdd(Op0, Op1, "vadduqm");
17414 else
17415 return Builder.CreateSub(Op0, Op1, "vsubuqm");
17416 }
17417 case PPC::BI__builtin_altivec_vaddcuq_c:
17418 case PPC::BI__builtin_altivec_vsubcuq_c: {
17420 Value *Op0 = EmitScalarExpr(E->getArg(0));
17421 Value *Op1 = EmitScalarExpr(E->getArg(1));
17422 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17423 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17424 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17425 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17426 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
17427 ? Intrinsic::ppc_altivec_vaddcuq
17428 : Intrinsic::ppc_altivec_vsubcuq;
17429 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17430 }
17431 case PPC::BI__builtin_altivec_vaddeuqm_c:
17432 case PPC::BI__builtin_altivec_vaddecuq_c:
17433 case PPC::BI__builtin_altivec_vsubeuqm_c:
17434 case PPC::BI__builtin_altivec_vsubecuq_c: {
17436 Value *Op0 = EmitScalarExpr(E->getArg(0));
17437 Value *Op1 = EmitScalarExpr(E->getArg(1));
17438 Value *Op2 = EmitScalarExpr(E->getArg(2));
17439 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17440 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17441 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17442 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17443 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
17444 switch (BuiltinID) {
17445 default:
17446 llvm_unreachable("Unsupported intrinsic!");
17447 case PPC::BI__builtin_altivec_vaddeuqm_c:
17448 ID = Intrinsic::ppc_altivec_vaddeuqm;
17449 break;
17450 case PPC::BI__builtin_altivec_vaddecuq_c:
17451 ID = Intrinsic::ppc_altivec_vaddecuq;
17452 break;
17453 case PPC::BI__builtin_altivec_vsubeuqm_c:
17454 ID = Intrinsic::ppc_altivec_vsubeuqm;
17455 break;
17456 case PPC::BI__builtin_altivec_vsubecuq_c:
17457 ID = Intrinsic::ppc_altivec_vsubecuq;
17458 break;
17459 }
17460 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17461 }
17462 case PPC::BI__builtin_ppc_rldimi:
17463 case PPC::BI__builtin_ppc_rlwimi: {
17464 Value *Op0 = EmitScalarExpr(E->getArg(0));
17465 Value *Op1 = EmitScalarExpr(E->getArg(1));
17466 Value *Op2 = EmitScalarExpr(E->getArg(2));
17467 Value *Op3 = EmitScalarExpr(E->getArg(3));
17468 // rldimi is 64-bit instruction, expand the intrinsic before isel to
17469 // leverage peephole and avoid legalization efforts.
17470 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
17471 !getTarget().getTriple().isPPC64()) {
17472 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
17473 Op2 = Builder.CreateZExt(Op2, Int64Ty);
17474 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
17475 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
17476 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
17477 }
17478 return Builder.CreateCall(
17479 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
17480 ? Intrinsic::ppc_rldimi
17481 : Intrinsic::ppc_rlwimi),
17482 {Op0, Op1, Op2, Op3});
17483 }
17484 case PPC::BI__builtin_ppc_rlwnm: {
17485 Value *Op0 = EmitScalarExpr(E->getArg(0));
17486 Value *Op1 = EmitScalarExpr(E->getArg(1));
17487 Value *Op2 = EmitScalarExpr(E->getArg(2));
17488 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
17489 {Op0, Op1, Op2});
17490 }
17491 case PPC::BI__builtin_ppc_poppar4:
17492 case PPC::BI__builtin_ppc_poppar8: {
17493 Value *Op0 = EmitScalarExpr(E->getArg(0));
17494 llvm::Type *ArgType = Op0->getType();
17495 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
17496 Value *Tmp = Builder.CreateCall(F, Op0);
17497
17498 llvm::Type *ResultType = ConvertType(E->getType());
17499 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
17500 if (Result->getType() != ResultType)
17501 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
17502 "cast");
17503 return Result;
17504 }
17505 case PPC::BI__builtin_ppc_cmpb: {
17506 Value *Op0 = EmitScalarExpr(E->getArg(0));
17507 Value *Op1 = EmitScalarExpr(E->getArg(1));
17508 if (getTarget().getTriple().isPPC64()) {
17509 Function *F =
17510 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
17511 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
17512 }
17513 // For 32 bit, emit the code as below:
17514 // %conv = trunc i64 %a to i32
17515 // %conv1 = trunc i64 %b to i32
17516 // %shr = lshr i64 %a, 32
17517 // %conv2 = trunc i64 %shr to i32
17518 // %shr3 = lshr i64 %b, 32
17519 // %conv4 = trunc i64 %shr3 to i32
17520 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
17521 // %conv5 = zext i32 %0 to i64
17522 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
17523 // %conv614 = zext i32 %1 to i64
17524 // %shl = shl nuw i64 %conv614, 32
17525 // %or = or i64 %shl, %conv5
17526 // ret i64 %or
17527 Function *F =
17528 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
17529 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
17530 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
17531 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
17532 Value *ArgOneHi =
17533 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
17534 Value *ArgTwoHi =
17535 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
17536 Value *ResLo = Builder.CreateZExt(
17537 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
17538 Value *ResHiShift = Builder.CreateZExt(
17539 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
17540 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
17541 return Builder.CreateOr(ResLo, ResHi);
17542 }
17543 // Copy sign
17544 case PPC::BI__builtin_vsx_xvcpsgnsp:
17545 case PPC::BI__builtin_vsx_xvcpsgndp: {
17546 llvm::Type *ResultType = ConvertType(E->getType());
17547 Value *X = EmitScalarExpr(E->getArg(0));
17548 Value *Y = EmitScalarExpr(E->getArg(1));
17549 ID = Intrinsic::copysign;
17550 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17551 return Builder.CreateCall(F, {X, Y});
17552 }
17553 // Rounding/truncation
17554 case PPC::BI__builtin_vsx_xvrspip:
17555 case PPC::BI__builtin_vsx_xvrdpip:
17556 case PPC::BI__builtin_vsx_xvrdpim:
17557 case PPC::BI__builtin_vsx_xvrspim:
17558 case PPC::BI__builtin_vsx_xvrdpi:
17559 case PPC::BI__builtin_vsx_xvrspi:
17560 case PPC::BI__builtin_vsx_xvrdpic:
17561 case PPC::BI__builtin_vsx_xvrspic:
17562 case PPC::BI__builtin_vsx_xvrdpiz:
17563 case PPC::BI__builtin_vsx_xvrspiz: {
17564 llvm::Type *ResultType = ConvertType(E->getType());
17565 Value *X = EmitScalarExpr(E->getArg(0));
17566 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
17567 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
17568 ID = Builder.getIsFPConstrained()
17569 ? Intrinsic::experimental_constrained_floor
17570 : Intrinsic::floor;
17571 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
17572 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
17573 ID = Builder.getIsFPConstrained()
17574 ? Intrinsic::experimental_constrained_round
17575 : Intrinsic::round;
17576 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
17577 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
17578 ID = Builder.getIsFPConstrained()
17579 ? Intrinsic::experimental_constrained_rint
17580 : Intrinsic::rint;
17581 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
17582 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
17583 ID = Builder.getIsFPConstrained()
17584 ? Intrinsic::experimental_constrained_ceil
17585 : Intrinsic::ceil;
17586 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
17587 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
17588 ID = Builder.getIsFPConstrained()
17589 ? Intrinsic::experimental_constrained_trunc
17590 : Intrinsic::trunc;
17591 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17592 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
17593 : Builder.CreateCall(F, X);
17594 }
17595
17596 // Absolute value
17597 case PPC::BI__builtin_vsx_xvabsdp:
17598 case PPC::BI__builtin_vsx_xvabssp: {
17599 llvm::Type *ResultType = ConvertType(E->getType());
17600 Value *X = EmitScalarExpr(E->getArg(0));
17601 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17602 return Builder.CreateCall(F, X);
17603 }
17604
17605 // Fastmath by default
17606 case PPC::BI__builtin_ppc_recipdivf:
17607 case PPC::BI__builtin_ppc_recipdivd:
17608 case PPC::BI__builtin_ppc_rsqrtf:
17609 case PPC::BI__builtin_ppc_rsqrtd: {
17610 FastMathFlags FMF = Builder.getFastMathFlags();
17611 Builder.getFastMathFlags().setFast();
17612 llvm::Type *ResultType = ConvertType(E->getType());
17613 Value *X = EmitScalarExpr(E->getArg(0));
17614
17615 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
17616 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
17617 Value *Y = EmitScalarExpr(E->getArg(1));
17618 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
17619 Builder.getFastMathFlags() &= (FMF);
17620 return FDiv;
17621 }
17622 auto *One = ConstantFP::get(ResultType, 1.0);
17623 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17624 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
17625 Builder.getFastMathFlags() &= (FMF);
17626 return FDiv;
17627 }
17628 case PPC::BI__builtin_ppc_alignx: {
17629 Value *Op0 = EmitScalarExpr(E->getArg(0));
17630 Value *Op1 = EmitScalarExpr(E->getArg(1));
17631 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
17632 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
17633 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
17634 llvm::Value::MaximumAlignment);
17635
17636 emitAlignmentAssumption(Op1, E->getArg(1),
17637 /*The expr loc is sufficient.*/ SourceLocation(),
17638 AlignmentCI, nullptr);
17639 return Op1;
17640 }
17641 case PPC::BI__builtin_ppc_rdlam: {
17642 Value *Op0 = EmitScalarExpr(E->getArg(0));
17643 Value *Op1 = EmitScalarExpr(E->getArg(1));
17644 Value *Op2 = EmitScalarExpr(E->getArg(2));
17645 llvm::Type *Ty = Op0->getType();
17646 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
17647 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
17648 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
17649 return Builder.CreateAnd(Rotate, Op2);
17650 }
17651 case PPC::BI__builtin_ppc_load2r: {
17652 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
17653 Value *Op0 = EmitScalarExpr(E->getArg(0));
17654 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
17655 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
17656 }
17657 // FMA variations
17658 case PPC::BI__builtin_ppc_fnmsub:
17659 case PPC::BI__builtin_ppc_fnmsubs:
17660 case PPC::BI__builtin_vsx_xvmaddadp:
17661 case PPC::BI__builtin_vsx_xvmaddasp:
17662 case PPC::BI__builtin_vsx_xvnmaddadp:
17663 case PPC::BI__builtin_vsx_xvnmaddasp:
17664 case PPC::BI__builtin_vsx_xvmsubadp:
17665 case PPC::BI__builtin_vsx_xvmsubasp:
17666 case PPC::BI__builtin_vsx_xvnmsubadp:
17667 case PPC::BI__builtin_vsx_xvnmsubasp: {
17668 llvm::Type *ResultType = ConvertType(E->getType());
17669 Value *X = EmitScalarExpr(E->getArg(0));
17670 Value *Y = EmitScalarExpr(E->getArg(1));
17671 Value *Z = EmitScalarExpr(E->getArg(2));
17672 llvm::Function *F;
17673 if (Builder.getIsFPConstrained())
17674 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17675 else
17676 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17677 switch (BuiltinID) {
17678 case PPC::BI__builtin_vsx_xvmaddadp:
17679 case PPC::BI__builtin_vsx_xvmaddasp:
17680 if (Builder.getIsFPConstrained())
17681 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17682 else
17683 return Builder.CreateCall(F, {X, Y, Z});
17684 case PPC::BI__builtin_vsx_xvnmaddadp:
17685 case PPC::BI__builtin_vsx_xvnmaddasp:
17686 if (Builder.getIsFPConstrained())
17687 return Builder.CreateFNeg(
17688 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17689 else
17690 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17691 case PPC::BI__builtin_vsx_xvmsubadp:
17692 case PPC::BI__builtin_vsx_xvmsubasp:
17693 if (Builder.getIsFPConstrained())
17694 return Builder.CreateConstrainedFPCall(
17695 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17696 else
17697 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17698 case PPC::BI__builtin_ppc_fnmsub:
17699 case PPC::BI__builtin_ppc_fnmsubs:
17700 case PPC::BI__builtin_vsx_xvnmsubadp:
17701 case PPC::BI__builtin_vsx_xvnmsubasp:
17702 if (Builder.getIsFPConstrained())
17703 return Builder.CreateFNeg(
17704 Builder.CreateConstrainedFPCall(
17705 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
17706 "neg");
17707 else
17708 return Builder.CreateCall(
17709 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
17710 }
17711 llvm_unreachable("Unknown FMA operation");
17712 return nullptr; // Suppress no-return warning
17713 }
17714
17715 case PPC::BI__builtin_vsx_insertword: {
17716 Value *Op0 = EmitScalarExpr(E->getArg(0));
17717 Value *Op1 = EmitScalarExpr(E->getArg(1));
17718 Value *Op2 = EmitScalarExpr(E->getArg(2));
17719 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
17720
17721 // Third argument is a compile time constant int. It must be clamped to
17722 // to the range [0, 12].
17723 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17724 assert(ArgCI &&
17725 "Third arg to xxinsertw intrinsic must be constant integer");
17726 const int64_t MaxIndex = 12;
17727 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17728
17729 // The builtin semantics don't exactly match the xxinsertw instructions
17730 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
17731 // word from the first argument, and inserts it in the second argument. The
17732 // instruction extracts the word from its second input register and inserts
17733 // it into its first input register, so swap the first and second arguments.
17734 std::swap(Op0, Op1);
17735
17736 // Need to cast the second argument from a vector of unsigned int to a
17737 // vector of long long.
17738 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17739
17740 if (getTarget().isLittleEndian()) {
17741 // Reverse the double words in the vector we will extract from.
17742 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17743 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
17744
17745 // Reverse the index.
17746 Index = MaxIndex - Index;
17747 }
17748
17749 // Intrinsic expects the first arg to be a vector of int.
17750 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17751 Op2 = ConstantInt::getSigned(Int32Ty, Index);
17752 return Builder.CreateCall(F, {Op0, Op1, Op2});
17753 }
17754
17755 case PPC::BI__builtin_vsx_extractuword: {
17756 Value *Op0 = EmitScalarExpr(E->getArg(0));
17757 Value *Op1 = EmitScalarExpr(E->getArg(1));
17758 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
17759
17760 // Intrinsic expects the first argument to be a vector of doublewords.
17761 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17762
17763 // The second argument is a compile time constant int that needs to
17764 // be clamped to the range [0, 12].
17765 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
17766 assert(ArgCI &&
17767 "Second Arg to xxextractuw intrinsic must be a constant integer!");
17768 const int64_t MaxIndex = 12;
17769 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17770
17771 if (getTarget().isLittleEndian()) {
17772 // Reverse the index.
17773 Index = MaxIndex - Index;
17774 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17775
17776 // Emit the call, then reverse the double words of the results vector.
17777 Value *Call = Builder.CreateCall(F, {Op0, Op1});
17778
17779 Value *ShuffleCall =
17780 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
17781 return ShuffleCall;
17782 } else {
17783 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17784 return Builder.CreateCall(F, {Op0, Op1});
17785 }
17786 }
17787
17788 case PPC::BI__builtin_vsx_xxpermdi: {
17789 Value *Op0 = EmitScalarExpr(E->getArg(0));
17790 Value *Op1 = EmitScalarExpr(E->getArg(1));
17791 Value *Op2 = EmitScalarExpr(E->getArg(2));
17792 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17793 assert(ArgCI && "Third arg must be constant integer!");
17794
17795 unsigned Index = ArgCI->getZExtValue();
17796 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17797 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17798
17799 // Account for endianness by treating this as just a shuffle. So we use the
17800 // same indices for both LE and BE in order to produce expected results in
17801 // both cases.
17802 int ElemIdx0 = (Index & 2) >> 1;
17803 int ElemIdx1 = 2 + (Index & 1);
17804
17805 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
17806 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17807 QualType BIRetType = E->getType();
17808 auto RetTy = ConvertType(BIRetType);
17809 return Builder.CreateBitCast(ShuffleCall, RetTy);
17810 }
17811
17812 case PPC::BI__builtin_vsx_xxsldwi: {
17813 Value *Op0 = EmitScalarExpr(E->getArg(0));
17814 Value *Op1 = EmitScalarExpr(E->getArg(1));
17815 Value *Op2 = EmitScalarExpr(E->getArg(2));
17816 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17817 assert(ArgCI && "Third argument must be a compile time constant");
17818 unsigned Index = ArgCI->getZExtValue() & 0x3;
17819 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17820 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
17821
17822 // Create a shuffle mask
17823 int ElemIdx0;
17824 int ElemIdx1;
17825 int ElemIdx2;
17826 int ElemIdx3;
17827 if (getTarget().isLittleEndian()) {
17828 // Little endian element N comes from element 8+N-Index of the
17829 // concatenated wide vector (of course, using modulo arithmetic on
17830 // the total number of elements).
17831 ElemIdx0 = (8 - Index) % 8;
17832 ElemIdx1 = (9 - Index) % 8;
17833 ElemIdx2 = (10 - Index) % 8;
17834 ElemIdx3 = (11 - Index) % 8;
17835 } else {
17836 // Big endian ElemIdx<N> = Index + N
17837 ElemIdx0 = Index;
17838 ElemIdx1 = Index + 1;
17839 ElemIdx2 = Index + 2;
17840 ElemIdx3 = Index + 3;
17841 }
17842
17843 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
17844 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17845 QualType BIRetType = E->getType();
17846 auto RetTy = ConvertType(BIRetType);
17847 return Builder.CreateBitCast(ShuffleCall, RetTy);
17848 }
17849
17850 case PPC::BI__builtin_pack_vector_int128: {
17851 Value *Op0 = EmitScalarExpr(E->getArg(0));
17852 Value *Op1 = EmitScalarExpr(E->getArg(1));
17853 bool isLittleEndian = getTarget().isLittleEndian();
17854 Value *PoisonValue =
17855 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
17856 Value *Res = Builder.CreateInsertElement(
17857 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
17858 Res = Builder.CreateInsertElement(Res, Op1,
17859 (uint64_t)(isLittleEndian ? 0 : 1));
17860 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
17861 }
17862
17863 case PPC::BI__builtin_unpack_vector_int128: {
17864 Value *Op0 = EmitScalarExpr(E->getArg(0));
17865 Value *Op1 = EmitScalarExpr(E->getArg(1));
17866 ConstantInt *Index = cast<ConstantInt>(Op1);
17867 Value *Unpacked = Builder.CreateBitCast(
17868 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
17869
17870 if (getTarget().isLittleEndian())
17871 Index =
17872 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
17873
17874 return Builder.CreateExtractElement(Unpacked, Index);
17875 }
17876
17877 case PPC::BI__builtin_ppc_sthcx: {
17878 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
17879 Value *Op0 = EmitScalarExpr(E->getArg(0));
17880 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
17881 return Builder.CreateCall(F, {Op0, Op1});
17882 }
17883
17884 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
17885 // Some of the MMA instructions accumulate their result into an existing
17886 // accumulator whereas the others generate a new accumulator. So we need to
17887 // use custom code generation to expand a builtin call with a pointer to a
17888 // load (if the corresponding instruction accumulates its result) followed by
17889 // the call to the intrinsic and a store of the result.
17890#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
17891 case PPC::BI__builtin_##Name:
17892#include "clang/Basic/BuiltinsPPC.def"
17893 {
17895 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
17896 if (E->getArg(i)->getType()->isArrayType())
17897 Ops.push_back(
17898 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
17899 else
17900 Ops.push_back(EmitScalarExpr(E->getArg(i)));
17901 // The first argument of these two builtins is a pointer used to store their
17902 // result. However, the llvm intrinsics return their result in multiple
17903 // return values. So, here we emit code extracting these values from the
17904 // intrinsic results and storing them using that pointer.
17905 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
17906 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
17907 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
17908 unsigned NumVecs = 2;
17909 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
17910 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
17911 NumVecs = 4;
17912 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
17913 }
17914 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
17915 Address Addr = EmitPointerWithAlignment(E->getArg(1));
17916 Value *Vec = Builder.CreateLoad(Addr);
17917 Value *Call = Builder.CreateCall(F, {Vec});
17918 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
17919 Value *Ptr = Ops[0];
17920 for (unsigned i=0; i<NumVecs; i++) {
17921 Value *Vec = Builder.CreateExtractValue(Call, i);
17922 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
17923 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
17924 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
17925 }
17926 return Call;
17927 }
17928 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
17929 BuiltinID == PPC::BI__builtin_mma_build_acc) {
17930 // Reverse the order of the operands for LE, so the
17931 // same builtin call can be used on both LE and BE
17932 // without the need for the programmer to swap operands.
17933 // The operands are reversed starting from the second argument,
17934 // the first operand is the pointer to the pair/accumulator
17935 // that is being built.
17936 if (getTarget().isLittleEndian())
17937 std::reverse(Ops.begin() + 1, Ops.end());
17938 }
17939 bool Accumulate;
17940 switch (BuiltinID) {
17941 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
17942 case PPC::BI__builtin_##Name: \
17943 ID = Intrinsic::ppc_##Intr; \
17944 Accumulate = Acc; \
17945 break;
17946 #include "clang/Basic/BuiltinsPPC.def"
17947 }
17948 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17949 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
17950 BuiltinID == PPC::BI__builtin_mma_lxvp ||
17951 BuiltinID == PPC::BI__builtin_mma_stxvp) {
17952 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17953 BuiltinID == PPC::BI__builtin_mma_lxvp) {
17954 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17955 } else {
17956 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17957 }
17958 Ops.pop_back();
17959 llvm::Function *F = CGM.getIntrinsic(ID);
17960 return Builder.CreateCall(F, Ops, "");
17961 }
17962 SmallVector<Value*, 4> CallOps;
17963 if (Accumulate) {
17964 Address Addr = EmitPointerWithAlignment(E->getArg(0));
17965 Value *Acc = Builder.CreateLoad(Addr);
17966 CallOps.push_back(Acc);
17967 }
17968 for (unsigned i=1; i<Ops.size(); i++)
17969 CallOps.push_back(Ops[i]);
17970 llvm::Function *F = CGM.getIntrinsic(ID);
17971 Value *Call = Builder.CreateCall(F, CallOps);
17972 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
17973 }
17974
17975 case PPC::BI__builtin_ppc_compare_and_swap:
17976 case PPC::BI__builtin_ppc_compare_and_swaplp: {
17977 Address Addr = EmitPointerWithAlignment(E->getArg(0));
17978 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
17979 Value *OldVal = Builder.CreateLoad(OldValAddr);
17980 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
17981 LValue LV = MakeAddrLValue(Addr, AtomicTy);
17982 Value *Op2 = EmitScalarExpr(E->getArg(2));
17983 auto Pair = EmitAtomicCompareExchange(
17984 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
17985 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
17986 // Unlike c11's atomic_compare_exchange, according to
17987 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
17988 // > In either case, the contents of the memory location specified by addr
17989 // > are copied into the memory location specified by old_val_addr.
17990 // But it hasn't specified storing to OldValAddr is atomic or not and
17991 // which order to use. Now following XL's codegen, treat it as a normal
17992 // store.
17993 Value *LoadedVal = Pair.first.getScalarVal();
17994 Builder.CreateStore(LoadedVal, OldValAddr);
17995 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
17996 }
17997 case PPC::BI__builtin_ppc_fetch_and_add:
17998 case PPC::BI__builtin_ppc_fetch_and_addlp: {
17999 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
18000 llvm::AtomicOrdering::Monotonic);
18001 }
18002 case PPC::BI__builtin_ppc_fetch_and_and:
18003 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18004 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
18005 llvm::AtomicOrdering::Monotonic);
18006 }
18007
18008 case PPC::BI__builtin_ppc_fetch_and_or:
18009 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18010 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
18011 llvm::AtomicOrdering::Monotonic);
18012 }
18013 case PPC::BI__builtin_ppc_fetch_and_swap:
18014 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18015 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
18016 llvm::AtomicOrdering::Monotonic);
18017 }
18018 case PPC::BI__builtin_ppc_ldarx:
18019 case PPC::BI__builtin_ppc_lwarx:
18020 case PPC::BI__builtin_ppc_lharx:
18021 case PPC::BI__builtin_ppc_lbarx:
18022 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
18023 case PPC::BI__builtin_ppc_mfspr: {
18024 Value *Op0 = EmitScalarExpr(E->getArg(0));
18025 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18026 ? Int32Ty
18027 : Int64Ty;
18028 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
18029 return Builder.CreateCall(F, {Op0});
18030 }
18031 case PPC::BI__builtin_ppc_mtspr: {
18032 Value *Op0 = EmitScalarExpr(E->getArg(0));
18033 Value *Op1 = EmitScalarExpr(E->getArg(1));
18034 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18035 ? Int32Ty
18036 : Int64Ty;
18037 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
18038 return Builder.CreateCall(F, {Op0, Op1});
18039 }
18040 case PPC::BI__builtin_ppc_popcntb: {
18041 Value *ArgValue = EmitScalarExpr(E->getArg(0));
18042 llvm::Type *ArgType = ArgValue->getType();
18043 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
18044 return Builder.CreateCall(F, {ArgValue}, "popcntb");
18045 }
18046 case PPC::BI__builtin_ppc_mtfsf: {
18047 // The builtin takes a uint32 that needs to be cast to an
18048 // f64 to be passed to the intrinsic.
18049 Value *Op0 = EmitScalarExpr(E->getArg(0));
18050 Value *Op1 = EmitScalarExpr(E->getArg(1));
18051 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18052 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18053 return Builder.CreateCall(F, {Op0, Cast}, "");
18054 }
18055
18056 case PPC::BI__builtin_ppc_swdiv_nochk:
18057 case PPC::BI__builtin_ppc_swdivs_nochk: {
18058 Value *Op0 = EmitScalarExpr(E->getArg(0));
18059 Value *Op1 = EmitScalarExpr(E->getArg(1));
18060 FastMathFlags FMF = Builder.getFastMathFlags();
18061 Builder.getFastMathFlags().setFast();
18062 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18063 Builder.getFastMathFlags() &= (FMF);
18064 return FDiv;
18065 }
18066 case PPC::BI__builtin_ppc_fric:
18068 *this, E, Intrinsic::rint,
18069 Intrinsic::experimental_constrained_rint))
18070 .getScalarVal();
18071 case PPC::BI__builtin_ppc_frim:
18072 case PPC::BI__builtin_ppc_frims:
18074 *this, E, Intrinsic::floor,
18075 Intrinsic::experimental_constrained_floor))
18076 .getScalarVal();
18077 case PPC::BI__builtin_ppc_frin:
18078 case PPC::BI__builtin_ppc_frins:
18080 *this, E, Intrinsic::round,
18081 Intrinsic::experimental_constrained_round))
18082 .getScalarVal();
18083 case PPC::BI__builtin_ppc_frip:
18084 case PPC::BI__builtin_ppc_frips:
18086 *this, E, Intrinsic::ceil,
18087 Intrinsic::experimental_constrained_ceil))
18088 .getScalarVal();
18089 case PPC::BI__builtin_ppc_friz:
18090 case PPC::BI__builtin_ppc_frizs:
18092 *this, E, Intrinsic::trunc,
18093 Intrinsic::experimental_constrained_trunc))
18094 .getScalarVal();
18095 case PPC::BI__builtin_ppc_fsqrt:
18096 case PPC::BI__builtin_ppc_fsqrts:
18098 *this, E, Intrinsic::sqrt,
18099 Intrinsic::experimental_constrained_sqrt))
18100 .getScalarVal();
18101 case PPC::BI__builtin_ppc_test_data_class: {
18102 Value *Op0 = EmitScalarExpr(E->getArg(0));
18103 Value *Op1 = EmitScalarExpr(E->getArg(1));
18104 return Builder.CreateCall(
18105 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18106 {Op0, Op1}, "test_data_class");
18107 }
18108 case PPC::BI__builtin_ppc_maxfe: {
18109 Value *Op0 = EmitScalarExpr(E->getArg(0));
18110 Value *Op1 = EmitScalarExpr(E->getArg(1));
18111 Value *Op2 = EmitScalarExpr(E->getArg(2));
18112 Value *Op3 = EmitScalarExpr(E->getArg(3));
18113 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18114 {Op0, Op1, Op2, Op3});
18115 }
18116 case PPC::BI__builtin_ppc_maxfl: {
18117 Value *Op0 = EmitScalarExpr(E->getArg(0));
18118 Value *Op1 = EmitScalarExpr(E->getArg(1));
18119 Value *Op2 = EmitScalarExpr(E->getArg(2));
18120 Value *Op3 = EmitScalarExpr(E->getArg(3));
18121 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18122 {Op0, Op1, Op2, Op3});
18123 }
18124 case PPC::BI__builtin_ppc_maxfs: {
18125 Value *Op0 = EmitScalarExpr(E->getArg(0));
18126 Value *Op1 = EmitScalarExpr(E->getArg(1));
18127 Value *Op2 = EmitScalarExpr(E->getArg(2));
18128 Value *Op3 = EmitScalarExpr(E->getArg(3));
18129 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18130 {Op0, Op1, Op2, Op3});
18131 }
18132 case PPC::BI__builtin_ppc_minfe: {
18133 Value *Op0 = EmitScalarExpr(E->getArg(0));
18134 Value *Op1 = EmitScalarExpr(E->getArg(1));
18135 Value *Op2 = EmitScalarExpr(E->getArg(2));
18136 Value *Op3 = EmitScalarExpr(E->getArg(3));
18137 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18138 {Op0, Op1, Op2, Op3});
18139 }
18140 case PPC::BI__builtin_ppc_minfl: {
18141 Value *Op0 = EmitScalarExpr(E->getArg(0));
18142 Value *Op1 = EmitScalarExpr(E->getArg(1));
18143 Value *Op2 = EmitScalarExpr(E->getArg(2));
18144 Value *Op3 = EmitScalarExpr(E->getArg(3));
18145 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18146 {Op0, Op1, Op2, Op3});
18147 }
18148 case PPC::BI__builtin_ppc_minfs: {
18149 Value *Op0 = EmitScalarExpr(E->getArg(0));
18150 Value *Op1 = EmitScalarExpr(E->getArg(1));
18151 Value *Op2 = EmitScalarExpr(E->getArg(2));
18152 Value *Op3 = EmitScalarExpr(E->getArg(3));
18153 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18154 {Op0, Op1, Op2, Op3});
18155 }
18156 case PPC::BI__builtin_ppc_swdiv:
18157 case PPC::BI__builtin_ppc_swdivs: {
18158 Value *Op0 = EmitScalarExpr(E->getArg(0));
18159 Value *Op1 = EmitScalarExpr(E->getArg(1));
18160 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18161 }
18162 case PPC::BI__builtin_ppc_set_fpscr_rn:
18163 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18164 {EmitScalarExpr(E->getArg(0))});
18165 case PPC::BI__builtin_ppc_mffs:
18166 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18167 }
18168}
18169
18170namespace {
18171// If \p E is not null pointer, insert address space cast to match return
18172// type of \p E if necessary.
18173Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18174 const CallExpr *E = nullptr) {
18175 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18176 auto *Call = CGF.Builder.CreateCall(F);
18177 Call->addRetAttr(
18178 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18179 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18180 if (!E)
18181 return Call;
18182 QualType BuiltinRetType = E->getType();
18183 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18184 if (RetTy == Call->getType())
18185 return Call;
18186 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18187}
18188
18189Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18190 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18191 auto *Call = CGF.Builder.CreateCall(F);
18192 Call->addRetAttr(
18193 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18194 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18195 return Call;
18196}
18197
18198// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18199/// Emit code based on Code Object ABI version.
18200/// COV_4 : Emit code to use dispatch ptr
18201/// COV_5+ : Emit code to use implicitarg ptr
18202/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18203/// and use its value for COV_4 or COV_5+ approach. It is used for
18204/// compiling device libraries in an ABI-agnostic way.
18205///
18206/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18207/// clang during compilation of user code.
18208Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18209 llvm::LoadInst *LD;
18210
18211 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18212
18213 if (Cov == CodeObjectVersionKind::COV_None) {
18214 StringRef Name = "__oclc_ABI_version";
18215 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18216 if (!ABIVersionC)
18217 ABIVersionC = new llvm::GlobalVariable(
18218 CGF.CGM.getModule(), CGF.Int32Ty, false,
18219 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18220 llvm::GlobalVariable::NotThreadLocal,
18222
18223 // This load will be eliminated by the IPSCCP because it is constant
18224 // weak_odr without externally_initialized. Either changing it to weak or
18225 // adding externally_initialized will keep the load.
18226 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18227 CGF.CGM.getIntAlign());
18228
18229 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18230 ABIVersion,
18231 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
18232
18233 // Indexing the implicit kernarg segment.
18234 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18235 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18236
18237 // Indexing the HSA kernel_dispatch_packet struct.
18238 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18239 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18240
18241 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18242 LD = CGF.Builder.CreateLoad(
18244 } else {
18245 Value *GEP = nullptr;
18246 if (Cov >= CodeObjectVersionKind::COV_5) {
18247 // Indexing the implicit kernarg segment.
18248 GEP = CGF.Builder.CreateConstGEP1_32(
18249 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18250 } else {
18251 // Indexing the HSA kernel_dispatch_packet struct.
18252 GEP = CGF.Builder.CreateConstGEP1_32(
18253 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18254 }
18255 LD = CGF.Builder.CreateLoad(
18257 }
18258
18259 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
18260 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
18261 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
18262 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
18263 LD->setMetadata(llvm::LLVMContext::MD_noundef,
18264 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18265 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18266 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18267 return LD;
18268}
18269
18270// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18271Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
18272 const unsigned XOffset = 12;
18273 auto *DP = EmitAMDGPUDispatchPtr(CGF);
18274 // Indexing the HSA kernel_dispatch_packet struct.
18275 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
18276 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
18277 auto *LD = CGF.Builder.CreateLoad(
18279 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18280 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18281 return LD;
18282}
18283} // namespace
18284
18285// For processing memory ordering and memory scope arguments of various
18286// amdgcn builtins.
18287// \p Order takes a C++11 comptabile memory-ordering specifier and converts
18288// it into LLVM's memory ordering specifier using atomic C ABI, and writes
18289// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
18290// specific SyncScopeID and writes it to \p SSID.
18292 llvm::AtomicOrdering &AO,
18293 llvm::SyncScope::ID &SSID) {
18294 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
18295
18296 // Map C11/C++11 memory ordering to LLVM memory ordering
18297 assert(llvm::isValidAtomicOrderingCABI(ord));
18298 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
18299 case llvm::AtomicOrderingCABI::acquire:
18300 case llvm::AtomicOrderingCABI::consume:
18301 AO = llvm::AtomicOrdering::Acquire;
18302 break;
18303 case llvm::AtomicOrderingCABI::release:
18304 AO = llvm::AtomicOrdering::Release;
18305 break;
18306 case llvm::AtomicOrderingCABI::acq_rel:
18307 AO = llvm::AtomicOrdering::AcquireRelease;
18308 break;
18309 case llvm::AtomicOrderingCABI::seq_cst:
18310 AO = llvm::AtomicOrdering::SequentiallyConsistent;
18311 break;
18312 case llvm::AtomicOrderingCABI::relaxed:
18313 AO = llvm::AtomicOrdering::Monotonic;
18314 break;
18315 }
18316
18317 // Some of the atomic builtins take the scope as a string name.
18318 StringRef scp;
18319 if (llvm::getConstantStringInfo(Scope, scp)) {
18320 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18321 return;
18322 }
18323
18324 // Older builtins had an enum argument for the memory scope.
18325 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
18326 switch (scope) {
18327 case 0: // __MEMORY_SCOPE_SYSTEM
18328 SSID = llvm::SyncScope::System;
18329 break;
18330 case 1: // __MEMORY_SCOPE_DEVICE
18331 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
18332 break;
18333 case 2: // __MEMORY_SCOPE_WRKGRP
18334 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
18335 break;
18336 case 3: // __MEMORY_SCOPE_WVFRNT
18337 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
18338 break;
18339 case 4: // __MEMORY_SCOPE_SINGLE
18340 SSID = llvm::SyncScope::SingleThread;
18341 break;
18342 default:
18343 SSID = llvm::SyncScope::System;
18344 break;
18345 }
18346}
18347
18348llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
18349 unsigned Idx,
18350 const CallExpr *E) {
18351 llvm::Value *Arg = nullptr;
18352 if ((ICEArguments & (1 << Idx)) == 0) {
18353 Arg = EmitScalarExpr(E->getArg(Idx));
18354 } else {
18355 // If this is required to be a constant, constant fold it so that we
18356 // know that the generated intrinsic gets a ConstantInt.
18357 std::optional<llvm::APSInt> Result =
18358 E->getArg(Idx)->getIntegerConstantExpr(getContext());
18359 assert(Result && "Expected argument to be a constant");
18360 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
18361 }
18362 return Arg;
18363}
18364
18365Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {
18366 if (QT->hasFloatingRepresentation()) {
18367 switch (elementCount) {
18368 case 2:
18369 return Intrinsic::dx_dot2;
18370 case 3:
18371 return Intrinsic::dx_dot3;
18372 case 4:
18373 return Intrinsic::dx_dot4;
18374 }
18375 }
18377 return Intrinsic::dx_sdot;
18378
18380 return Intrinsic::dx_udot;
18381}
18382
18384 const CallExpr *E) {
18385 if (!getLangOpts().HLSL)
18386 return nullptr;
18387
18388 switch (BuiltinID) {
18389 case Builtin::BI__builtin_hlsl_elementwise_all: {
18390 Value *Op0 = EmitScalarExpr(E->getArg(0));
18391 return Builder.CreateIntrinsic(
18392 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18393 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18394 "hlsl.all");
18395 }
18396 case Builtin::BI__builtin_hlsl_elementwise_any: {
18397 Value *Op0 = EmitScalarExpr(E->getArg(0));
18398 return Builder.CreateIntrinsic(
18399 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18400 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18401 "hlsl.any");
18402 }
18403 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
18404 Value *OpX = EmitScalarExpr(E->getArg(0));
18405 Value *OpMin = EmitScalarExpr(E->getArg(1));
18406 Value *OpMax = EmitScalarExpr(E->getArg(2));
18407
18408 QualType Ty = E->getArg(0)->getType();
18409 bool IsUnsigned = false;
18410 if (auto *VecTy = Ty->getAs<VectorType>())
18411 Ty = VecTy->getElementType();
18412 IsUnsigned = Ty->isUnsignedIntegerType();
18413 return Builder.CreateIntrinsic(
18414 /*ReturnType=*/OpX->getType(),
18415 IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
18416 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
18417 }
18418 case Builtin::BI__builtin_hlsl_dot: {
18419 Value *Op0 = EmitScalarExpr(E->getArg(0));
18420 Value *Op1 = EmitScalarExpr(E->getArg(1));
18421 llvm::Type *T0 = Op0->getType();
18422 llvm::Type *T1 = Op1->getType();
18423 if (!T0->isVectorTy() && !T1->isVectorTy()) {
18424 if (T0->isFloatingPointTy())
18425 return Builder.CreateFMul(Op0, Op1, "dx.dot");
18426
18427 if (T0->isIntegerTy())
18428 return Builder.CreateMul(Op0, Op1, "dx.dot");
18429
18430 // Bools should have been promoted
18431 llvm_unreachable(
18432 "Scalar dot product is only supported on ints and floats.");
18433 }
18434 // A VectorSplat should have happened
18435 assert(T0->isVectorTy() && T1->isVectorTy() &&
18436 "Dot product of vector and scalar is not supported.");
18437
18438 // A vector sext or sitofp should have happened
18439 assert(T0->getScalarType() == T1->getScalarType() &&
18440 "Dot product of vectors need the same element types.");
18441
18442 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
18443 [[maybe_unused]] auto *VecTy1 =
18444 E->getArg(1)->getType()->getAs<VectorType>();
18445 // A HLSLVectorTruncation should have happend
18446 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
18447 "Dot product requires vectors to be of the same size.");
18448
18449 return Builder.CreateIntrinsic(
18450 /*ReturnType=*/T0->getScalarType(),
18451 getDotProductIntrinsic(E->getArg(0)->getType(),
18452 VecTy0->getNumElements()),
18453 ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
18454 } break;
18455 case Builtin::BI__builtin_hlsl_lerp: {
18456 Value *X = EmitScalarExpr(E->getArg(0));
18457 Value *Y = EmitScalarExpr(E->getArg(1));
18458 Value *S = EmitScalarExpr(E->getArg(2));
18459 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18460 llvm_unreachable("lerp operand must have a float representation");
18461 return Builder.CreateIntrinsic(
18462 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
18463 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
18464 }
18465 case Builtin::BI__builtin_hlsl_elementwise_frac: {
18466 Value *Op0 = EmitScalarExpr(E->getArg(0));
18467 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18468 llvm_unreachable("frac operand must have a float representation");
18469 return Builder.CreateIntrinsic(
18470 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
18471 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
18472}
18473case Builtin::BI__builtin_hlsl_elementwise_isinf: {
18474 Value *Op0 = EmitScalarExpr(E->getArg(0));
18475 llvm::Type *Xty = Op0->getType();
18476 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
18477 if (Xty->isVectorTy()) {
18478 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
18479 retType = llvm::VectorType::get(
18480 retType, ElementCount::getFixed(XVecTy->getNumElements()));
18481 }
18482 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18483 llvm_unreachable("isinf operand must have a float representation");
18484 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
18485 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
18486 }
18487 case Builtin::BI__builtin_hlsl_mad: {
18488 Value *M = EmitScalarExpr(E->getArg(0));
18489 Value *A = EmitScalarExpr(E->getArg(1));
18490 Value *B = EmitScalarExpr(E->getArg(2));
18491 if (E->getArg(0)->getType()->hasFloatingRepresentation())
18492 return Builder.CreateIntrinsic(
18493 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
18494 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
18495
18496 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
18497 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18498 return Builder.CreateIntrinsic(
18499 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
18500 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
18501
18502 Value *Mul = Builder.CreateNSWMul(M, A);
18503 return Builder.CreateNSWAdd(Mul, B);
18504 }
18505 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
18506 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18507 return Builder.CreateIntrinsic(
18508 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
18509 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
18510
18511 Value *Mul = Builder.CreateNUWMul(M, A);
18512 return Builder.CreateNUWAdd(Mul, B);
18513 }
18514 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
18515 Value *Op0 = EmitScalarExpr(E->getArg(0));
18516 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18517 llvm_unreachable("rcp operand must have a float representation");
18518 llvm::Type *Ty = Op0->getType();
18519 llvm::Type *EltTy = Ty->getScalarType();
18520 Constant *One = Ty->isVectorTy()
18521 ? ConstantVector::getSplat(
18522 ElementCount::getFixed(
18523 cast<FixedVectorType>(Ty)->getNumElements()),
18524 ConstantFP::get(EltTy, 1.0))
18525 : ConstantFP::get(EltTy, 1.0);
18526 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
18527 }
18528 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
18529 Value *Op0 = EmitScalarExpr(E->getArg(0));
18530 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18531 llvm_unreachable("rsqrt operand must have a float representation");
18532 return Builder.CreateIntrinsic(
18533 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
18534 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
18535 }
18536 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
18538 llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
18539 {}, false, true));
18540 }
18541 }
18542 return nullptr;
18543}
18544
18545void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
18546 const CallExpr *E) {
18547 constexpr const char *Tag = "amdgpu-as";
18548
18549 LLVMContext &Ctx = Inst->getContext();
18551 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
18552 llvm::Value *V = EmitScalarExpr(E->getArg(K));
18553 StringRef AS;
18554 if (llvm::getConstantStringInfo(V, AS)) {
18555 MMRAs.push_back({Tag, AS});
18556 // TODO: Delete the resulting unused constant?
18557 continue;
18558 }
18559 CGM.Error(E->getExprLoc(),
18560 "expected an address space name as a string literal");
18561 }
18562
18563 llvm::sort(MMRAs);
18564 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
18565 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
18566}
18567
18569 const CallExpr *E) {
18570 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
18571 llvm::SyncScope::ID SSID;
18572 switch (BuiltinID) {
18573 case AMDGPU::BI__builtin_amdgcn_div_scale:
18574 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
18575 // Translate from the intrinsics's struct return to the builtin's out
18576 // argument.
18577
18578 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
18579
18580 llvm::Value *X = EmitScalarExpr(E->getArg(0));
18581 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
18582 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
18583
18584 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
18585 X->getType());
18586
18587 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
18588
18589 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
18590 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
18591
18592 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
18593
18594 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
18595 Builder.CreateStore(FlagExt, FlagOutPtr);
18596 return Result;
18597 }
18598 case AMDGPU::BI__builtin_amdgcn_div_fmas:
18599 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
18600 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18601 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18602 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18603 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18604
18605 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
18606 Src0->getType());
18607 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
18608 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
18609 }
18610
18611 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
18612 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18613 Intrinsic::amdgcn_ds_swizzle);
18614 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
18615 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18616 Intrinsic::amdgcn_mov_dpp8);
18617 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
18618 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
18620 // Find out if any arguments are required to be integer constant
18621 // expressions.
18622 unsigned ICEArguments = 0;
18624 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
18625 assert(Error == ASTContext::GE_None && "Should not codegen an error");
18626 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
18627 Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
18628 }
18629 assert(Args.size() == 5 || Args.size() == 6);
18630 if (Args.size() == 5)
18631 Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
18632 Function *F =
18633 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
18634 return Builder.CreateCall(F, Args);
18635 }
18636 case AMDGPU::BI__builtin_amdgcn_permlane16:
18637 case AMDGPU::BI__builtin_amdgcn_permlanex16:
18638 return emitBuiltinWithOneOverloadedType<6>(
18639 *this, E,
18640 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
18641 ? Intrinsic::amdgcn_permlane16
18642 : Intrinsic::amdgcn_permlanex16);
18643 case AMDGPU::BI__builtin_amdgcn_permlane64:
18644 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18645 Intrinsic::amdgcn_permlane64);
18646 case AMDGPU::BI__builtin_amdgcn_readlane:
18647 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18648 Intrinsic::amdgcn_readlane);
18649 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
18650 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18651 Intrinsic::amdgcn_readfirstlane);
18652 case AMDGPU::BI__builtin_amdgcn_div_fixup:
18653 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
18654 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
18655 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18656 Intrinsic::amdgcn_div_fixup);
18657 case AMDGPU::BI__builtin_amdgcn_trig_preop:
18658 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
18659 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
18660 case AMDGPU::BI__builtin_amdgcn_rcp:
18661 case AMDGPU::BI__builtin_amdgcn_rcpf:
18662 case AMDGPU::BI__builtin_amdgcn_rcph:
18663 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
18664 case AMDGPU::BI__builtin_amdgcn_sqrt:
18665 case AMDGPU::BI__builtin_amdgcn_sqrtf:
18666 case AMDGPU::BI__builtin_amdgcn_sqrth:
18667 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18668 Intrinsic::amdgcn_sqrt);
18669 case AMDGPU::BI__builtin_amdgcn_rsq:
18670 case AMDGPU::BI__builtin_amdgcn_rsqf:
18671 case AMDGPU::BI__builtin_amdgcn_rsqh:
18672 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
18673 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
18674 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
18675 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18676 Intrinsic::amdgcn_rsq_clamp);
18677 case AMDGPU::BI__builtin_amdgcn_sinf:
18678 case AMDGPU::BI__builtin_amdgcn_sinh:
18679 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
18680 case AMDGPU::BI__builtin_amdgcn_cosf:
18681 case AMDGPU::BI__builtin_amdgcn_cosh:
18682 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
18683 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
18684 return EmitAMDGPUDispatchPtr(*this, E);
18685 case AMDGPU::BI__builtin_amdgcn_logf:
18686 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
18687 case AMDGPU::BI__builtin_amdgcn_exp2f:
18688 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18689 Intrinsic::amdgcn_exp2);
18690 case AMDGPU::BI__builtin_amdgcn_log_clampf:
18691 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18692 Intrinsic::amdgcn_log_clamp);
18693 case AMDGPU::BI__builtin_amdgcn_ldexp:
18694 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
18695 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18696 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18697 llvm::Function *F =
18698 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
18699 return Builder.CreateCall(F, {Src0, Src1});
18700 }
18701 case AMDGPU::BI__builtin_amdgcn_ldexph: {
18702 // The raw instruction has a different behavior for out of bounds exponent
18703 // values (implicit truncation instead of saturate to short_min/short_max).
18704 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18705 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18706 llvm::Function *F =
18707 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
18708 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
18709 }
18710 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
18711 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
18712 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
18713 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18714 Intrinsic::amdgcn_frexp_mant);
18715 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
18716 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
18717 Value *Src0 = EmitScalarExpr(E->getArg(0));
18718 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18719 { Builder.getInt32Ty(), Src0->getType() });
18720 return Builder.CreateCall(F, Src0);
18721 }
18722 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
18723 Value *Src0 = EmitScalarExpr(E->getArg(0));
18724 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18725 { Builder.getInt16Ty(), Src0->getType() });
18726 return Builder.CreateCall(F, Src0);
18727 }
18728 case AMDGPU::BI__builtin_amdgcn_fract:
18729 case AMDGPU::BI__builtin_amdgcn_fractf:
18730 case AMDGPU::BI__builtin_amdgcn_fracth:
18731 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18732 Intrinsic::amdgcn_fract);
18733 case AMDGPU::BI__builtin_amdgcn_lerp:
18734 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18735 Intrinsic::amdgcn_lerp);
18736 case AMDGPU::BI__builtin_amdgcn_ubfe:
18737 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18738 Intrinsic::amdgcn_ubfe);
18739 case AMDGPU::BI__builtin_amdgcn_sbfe:
18740 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18741 Intrinsic::amdgcn_sbfe);
18742 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
18743 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
18744 llvm::Type *ResultType = ConvertType(E->getType());
18745 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
18746 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
18747 return Builder.CreateCall(F, { Src });
18748 }
18749 case AMDGPU::BI__builtin_amdgcn_uicmp:
18750 case AMDGPU::BI__builtin_amdgcn_uicmpl:
18751 case AMDGPU::BI__builtin_amdgcn_sicmp:
18752 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
18753 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18754 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18755 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18756
18757 // FIXME-GFX10: How should 32 bit mask be handled?
18758 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
18759 { Builder.getInt64Ty(), Src0->getType() });
18760 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18761 }
18762 case AMDGPU::BI__builtin_amdgcn_fcmp:
18763 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
18764 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18765 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18766 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18767
18768 // FIXME-GFX10: How should 32 bit mask be handled?
18769 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
18770 { Builder.getInt64Ty(), Src0->getType() });
18771 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18772 }
18773 case AMDGPU::BI__builtin_amdgcn_class:
18774 case AMDGPU::BI__builtin_amdgcn_classf:
18775 case AMDGPU::BI__builtin_amdgcn_classh:
18776 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
18777 case AMDGPU::BI__builtin_amdgcn_fmed3f:
18778 case AMDGPU::BI__builtin_amdgcn_fmed3h:
18779 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18780 Intrinsic::amdgcn_fmed3);
18781 case AMDGPU::BI__builtin_amdgcn_ds_append:
18782 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
18783 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
18784 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
18785 Value *Src0 = EmitScalarExpr(E->getArg(0));
18786 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
18787 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
18788 }
18789 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18790 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18791 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18792 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18793 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18794 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18795 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18796 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18797 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18798 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
18799 Intrinsic::ID IID;
18800 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18801 switch (BuiltinID) {
18802 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18803 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18804 IID = Intrinsic::amdgcn_global_atomic_fadd;
18805 break;
18806 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18807 ArgTy = llvm::FixedVectorType::get(
18808 llvm::Type::getHalfTy(getLLVMContext()), 2);
18809 IID = Intrinsic::amdgcn_global_atomic_fadd;
18810 break;
18811 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18812 IID = Intrinsic::amdgcn_global_atomic_fadd;
18813 break;
18814 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18815 IID = Intrinsic::amdgcn_global_atomic_fmin;
18816 break;
18817 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18818 IID = Intrinsic::amdgcn_global_atomic_fmax;
18819 break;
18820 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18821 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18822 break;
18823 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18824 IID = Intrinsic::amdgcn_flat_atomic_fmin;
18825 break;
18826 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18827 IID = Intrinsic::amdgcn_flat_atomic_fmax;
18828 break;
18829 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18830 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18831 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18832 break;
18833 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
18834 ArgTy = llvm::FixedVectorType::get(
18835 llvm::Type::getHalfTy(getLLVMContext()), 2);
18836 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18837 break;
18838 }
18839 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18840 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18841 llvm::Function *F =
18842 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
18843 return Builder.CreateCall(F, {Addr, Val});
18844 }
18845 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18846 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18847 Intrinsic::ID IID;
18848 switch (BuiltinID) {
18849 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18850 IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18851 break;
18852 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18853 IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18854 break;
18855 }
18856 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18857 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18858 llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18859 return Builder.CreateCall(F, {Addr, Val});
18860 }
18861 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18862 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18863 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18864 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
18865 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
18866 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18867 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
18868 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: {
18869
18870 Intrinsic::ID IID;
18871 switch (BuiltinID) {
18872 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18873 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18874 IID = Intrinsic::amdgcn_global_load_tr_b64;
18875 break;
18876 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18877 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
18878 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
18879 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18880 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
18881 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
18882 IID = Intrinsic::amdgcn_global_load_tr_b128;
18883 break;
18884 }
18885 llvm::Type *LoadTy = ConvertType(E->getType());
18886 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18887 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
18888 return Builder.CreateCall(F, {Addr});
18889 }
18890 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
18891 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
18892 {llvm::Type::getInt64Ty(getLLVMContext())});
18893 return Builder.CreateCall(F);
18894 }
18895 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
18896 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
18897 {llvm::Type::getInt64Ty(getLLVMContext())});
18898 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
18899 return Builder.CreateCall(F, {Env});
18900 }
18901 case AMDGPU::BI__builtin_amdgcn_read_exec:
18902 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
18903 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
18904 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
18905 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
18906 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
18907 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
18908 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
18909 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
18910 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
18911 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
18912 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
18913 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
18914 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
18915 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
18916 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
18917
18918 // The builtins take these arguments as vec4 where the last element is
18919 // ignored. The intrinsic takes them as vec3.
18920 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
18921 ArrayRef<int>{0, 1, 2});
18922 RayDir =
18923 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
18924 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
18925 ArrayRef<int>{0, 1, 2});
18926
18927 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
18928 {NodePtr->getType(), RayDir->getType()});
18929 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
18930 RayInverseDir, TextureDescr});
18931 }
18932
18933 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
18935 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18936 Args.push_back(EmitScalarExpr(E->getArg(i)));
18937
18938 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
18939 Value *Call = Builder.CreateCall(F, Args);
18940 Value *Rtn = Builder.CreateExtractValue(Call, 0);
18941 Value *A = Builder.CreateExtractValue(Call, 1);
18942 llvm::Type *RetTy = ConvertType(E->getType());
18943 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
18944 (uint64_t)0);
18945 return Builder.CreateInsertElement(I0, A, 1);
18946 }
18947
18948 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18949 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18950 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18951 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18952 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18953 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18954 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18955 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18956 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18957 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18958 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18959 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18960 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18961 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18962 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18963 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18964 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18965 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18966 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18967 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18968 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18969 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18970 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18971 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18972 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18973 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18974 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18975 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18976 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18977 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18978 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18979 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18980 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18981 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18982 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18983 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18984 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18985 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18986 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18987 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18988 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18989 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18990 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18991 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18992 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18993 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18994 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18995 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18996 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18997 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18998 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18999 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19000 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19001 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19002 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19003 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19004 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19005 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19006 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19007 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
19008
19009 // These operations perform a matrix multiplication and accumulation of
19010 // the form:
19011 // D = A * B + C
19012 // We need to specify one type for matrices AB and one for matrices CD.
19013 // Sparse matrix operations can have different types for A and B as well as
19014 // an additional type for sparsity index.
19015 // Destination type should be put before types used for source operands.
19016 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
19017 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
19018 // There is no need for the variable opsel argument, so always set it to
19019 // "false".
19020 bool AppendFalseForOpselArg = false;
19021 unsigned BuiltinWMMAOp;
19022
19023 switch (BuiltinID) {
19024 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19025 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19026 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19027 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19028 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19029 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
19030 break;
19031 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
19032 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
19033 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
19034 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
19035 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19036 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
19037 break;
19038 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
19039 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
19040 AppendFalseForOpselArg = true;
19041 [[fallthrough]];
19042 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
19043 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
19044 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19045 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
19046 break;
19047 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
19048 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
19049 AppendFalseForOpselArg = true;
19050 [[fallthrough]];
19051 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19052 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19053 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19054 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
19055 break;
19056 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
19057 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
19058 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19059 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
19060 break;
19061 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19062 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
19063 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19064 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
19065 break;
19066 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
19067 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
19068 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
19069 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
19070 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19071 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
19072 break;
19073 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
19074 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
19075 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
19076 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
19077 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19078 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
19079 break;
19080 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
19081 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
19082 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19083 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
19084 break;
19085 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
19086 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
19087 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19088 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
19089 break;
19090 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
19091 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
19092 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19093 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
19094 break;
19095 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19096 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19097 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19098 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
19099 break;
19100 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19101 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19102 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19103 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
19104 break;
19105 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19106 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19107 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19108 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
19109 break;
19110 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19111 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19112 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19113 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
19114 break;
19115 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19116 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19117 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19118 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
19119 break;
19120 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19121 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19122 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19123 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
19124 break;
19125 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19126 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
19127 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19128 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
19129 break;
19130 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
19131 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
19132 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19133 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
19134 break;
19135 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
19136 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19137 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19138 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
19139 break;
19140 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19141 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19142 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19143 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
19144 break;
19145 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19146 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19147 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19148 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
19149 break;
19150 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19151 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19152 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19153 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
19154 break;
19155 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19156 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
19157 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19158 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
19159 break;
19160 }
19161
19163 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19164 Args.push_back(EmitScalarExpr(E->getArg(i)));
19165 if (AppendFalseForOpselArg)
19166 Args.push_back(Builder.getFalse());
19167
19169 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
19170 ArgTypes.push_back(Args[ArgIdx]->getType());
19171
19172 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
19173 return Builder.CreateCall(F, Args);
19174 }
19175
19176 // amdgcn workitem
19177 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
19178 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
19179 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
19180 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
19181 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
19182 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
19183
19184 // amdgcn workgroup size
19185 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
19186 return EmitAMDGPUWorkGroupSize(*this, 0);
19187 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
19188 return EmitAMDGPUWorkGroupSize(*this, 1);
19189 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
19190 return EmitAMDGPUWorkGroupSize(*this, 2);
19191
19192 // amdgcn grid size
19193 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
19194 return EmitAMDGPUGridSize(*this, 0);
19195 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
19196 return EmitAMDGPUGridSize(*this, 1);
19197 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
19198 return EmitAMDGPUGridSize(*this, 2);
19199
19200 // r600 intrinsics
19201 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
19202 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
19203 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19204 Intrinsic::r600_recipsqrt_ieee);
19205 case AMDGPU::BI__builtin_r600_read_tidig_x:
19206 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
19207 case AMDGPU::BI__builtin_r600_read_tidig_y:
19208 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
19209 case AMDGPU::BI__builtin_r600_read_tidig_z:
19210 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
19211 case AMDGPU::BI__builtin_amdgcn_alignbit: {
19212 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19213 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19214 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19215 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
19216 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19217 }
19218 case AMDGPU::BI__builtin_amdgcn_fence: {
19220 EmitScalarExpr(E->getArg(1)), AO, SSID);
19221 FenceInst *Fence = Builder.CreateFence(AO, SSID);
19222 if (E->getNumArgs() > 2)
19224 return Fence;
19225 }
19226 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19227 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19228 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19229 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19230 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19231 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19232 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19233 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
19234 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19235 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
19236 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
19237 llvm::AtomicRMWInst::BinOp BinOp;
19238 switch (BuiltinID) {
19239 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19240 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19241 BinOp = llvm::AtomicRMWInst::UIncWrap;
19242 break;
19243 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19244 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19245 BinOp = llvm::AtomicRMWInst::UDecWrap;
19246 break;
19247 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19248 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19249 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19250 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19251 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
19252 BinOp = llvm::AtomicRMWInst::FAdd;
19253 break;
19254 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
19255 BinOp = llvm::AtomicRMWInst::FMin;
19256 break;
19257 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
19258 BinOp = llvm::AtomicRMWInst::FMax;
19259 break;
19260 }
19261
19262 Address Ptr = CheckAtomicAlignment(*this, E);
19263 Value *Val = EmitScalarExpr(E->getArg(1));
19264 llvm::Type *OrigTy = Val->getType();
19265 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
19266
19267 bool Volatile;
19268
19269 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
19270 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
19271 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
19272 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
19273 Volatile =
19274 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
19275 } else {
19276 // Infer volatile from the passed type.
19277 Volatile =
19279 }
19280
19281 if (E->getNumArgs() >= 4) {
19282 // Some of the builtins have explicit ordering and scope arguments.
19284 EmitScalarExpr(E->getArg(3)), AO, SSID);
19285 } else {
19286 // The ds_atomic_fadd_* builtins do not have syncscope/order arguments.
19287 SSID = llvm::SyncScope::System;
19288 AO = AtomicOrdering::SequentiallyConsistent;
19289
19290 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
19291 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {
19292 llvm::Type *V2BF16Ty = FixedVectorType::get(
19293 llvm::Type::getBFloatTy(Builder.getContext()), 2);
19294 Val = Builder.CreateBitCast(Val, V2BF16Ty);
19295 }
19296 }
19297
19298 llvm::AtomicRMWInst *RMW =
19299 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
19300 if (Volatile)
19301 RMW->setVolatile(true);
19302 return Builder.CreateBitCast(RMW, OrigTy);
19303 }
19304 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
19305 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
19306 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
19307 llvm::Type *ResultType = ConvertType(E->getType());
19308 // s_sendmsg_rtn is mangled using return type only.
19309 Function *F =
19310 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
19311 return Builder.CreateCall(F, {Arg});
19312 }
19313 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
19314 return emitBuiltinWithOneOverloadedType<4>(
19315 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
19316 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
19317 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
19318 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
19319 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
19320 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
19321 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
19322 return emitBuiltinWithOneOverloadedType<5>(
19323 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
19324 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
19325 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
19326 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
19327 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
19328 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
19329 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
19330 llvm::Type *RetTy = nullptr;
19331 switch (BuiltinID) {
19332 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
19333 RetTy = Int8Ty;
19334 break;
19335 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
19336 RetTy = Int16Ty;
19337 break;
19338 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
19339 RetTy = Int32Ty;
19340 break;
19341 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
19342 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
19343 break;
19344 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
19345 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
19346 break;
19347 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
19348 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
19349 break;
19350 }
19351 Function *F =
19352 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
19353 return Builder.CreateCall(
19354 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
19355 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
19356 }
19357 default:
19358 return nullptr;
19359 }
19360}
19361
19362/// Handle a SystemZ function in which the final argument is a pointer
19363/// to an int that receives the post-instruction CC value. At the LLVM level
19364/// this is represented as a function that returns a {result, cc} pair.
19366 unsigned IntrinsicID,
19367 const CallExpr *E) {
19368 unsigned NumArgs = E->getNumArgs() - 1;
19369 SmallVector<Value *, 8> Args(NumArgs);
19370 for (unsigned I = 0; I < NumArgs; ++I)
19371 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
19372 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
19373 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
19374 Value *Call = CGF.Builder.CreateCall(F, Args);
19375 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
19376 CGF.Builder.CreateStore(CC, CCPtr);
19377 return CGF.Builder.CreateExtractValue(Call, 0);
19378}
19379
19381 const CallExpr *E) {
19382 switch (BuiltinID) {
19383 case SystemZ::BI__builtin_tbegin: {
19384 Value *TDB = EmitScalarExpr(E->getArg(0));
19385 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19386 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
19387 return Builder.CreateCall(F, {TDB, Control});
19388 }
19389 case SystemZ::BI__builtin_tbegin_nofloat: {
19390 Value *TDB = EmitScalarExpr(E->getArg(0));
19391 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19392 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
19393 return Builder.CreateCall(F, {TDB, Control});
19394 }
19395 case SystemZ::BI__builtin_tbeginc: {
19396 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
19397 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
19398 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
19399 return Builder.CreateCall(F, {TDB, Control});
19400 }
19401 case SystemZ::BI__builtin_tabort: {
19402 Value *Data = EmitScalarExpr(E->getArg(0));
19403 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
19404 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
19405 }
19406 case SystemZ::BI__builtin_non_tx_store: {
19407 Value *Address = EmitScalarExpr(E->getArg(0));
19408 Value *Data = EmitScalarExpr(E->getArg(1));
19409 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
19410 return Builder.CreateCall(F, {Data, Address});
19411 }
19412
19413 // Vector builtins. Note that most vector builtins are mapped automatically
19414 // to target-specific LLVM intrinsics. The ones handled specially here can
19415 // be represented via standard LLVM IR, which is preferable to enable common
19416 // LLVM optimizations.
19417
19418 case SystemZ::BI__builtin_s390_vpopctb:
19419 case SystemZ::BI__builtin_s390_vpopcth:
19420 case SystemZ::BI__builtin_s390_vpopctf:
19421 case SystemZ::BI__builtin_s390_vpopctg: {
19422 llvm::Type *ResultType = ConvertType(E->getType());
19423 Value *X = EmitScalarExpr(E->getArg(0));
19424 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
19425 return Builder.CreateCall(F, X);
19426 }
19427
19428 case SystemZ::BI__builtin_s390_vclzb:
19429 case SystemZ::BI__builtin_s390_vclzh:
19430 case SystemZ::BI__builtin_s390_vclzf:
19431 case SystemZ::BI__builtin_s390_vclzg: {
19432 llvm::Type *ResultType = ConvertType(E->getType());
19433 Value *X = EmitScalarExpr(E->getArg(0));
19434 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19435 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
19436 return Builder.CreateCall(F, {X, Undef});
19437 }
19438
19439 case SystemZ::BI__builtin_s390_vctzb:
19440 case SystemZ::BI__builtin_s390_vctzh:
19441 case SystemZ::BI__builtin_s390_vctzf:
19442 case SystemZ::BI__builtin_s390_vctzg: {
19443 llvm::Type *ResultType = ConvertType(E->getType());
19444 Value *X = EmitScalarExpr(E->getArg(0));
19445 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19446 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
19447 return Builder.CreateCall(F, {X, Undef});
19448 }
19449
19450 case SystemZ::BI__builtin_s390_verllb:
19451 case SystemZ::BI__builtin_s390_verllh:
19452 case SystemZ::BI__builtin_s390_verllf:
19453 case SystemZ::BI__builtin_s390_verllg: {
19454 llvm::Type *ResultType = ConvertType(E->getType());
19455 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19456 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19457 // Splat scalar rotate amount to vector type.
19458 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
19459 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
19460 Amt = Builder.CreateVectorSplat(NumElts, Amt);
19461 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19462 return Builder.CreateCall(F, { Src, Src, Amt });
19463 }
19464
19465 case SystemZ::BI__builtin_s390_verllvb:
19466 case SystemZ::BI__builtin_s390_verllvh:
19467 case SystemZ::BI__builtin_s390_verllvf:
19468 case SystemZ::BI__builtin_s390_verllvg: {
19469 llvm::Type *ResultType = ConvertType(E->getType());
19470 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19471 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19472 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19473 return Builder.CreateCall(F, { Src, Src, Amt });
19474 }
19475
19476 case SystemZ::BI__builtin_s390_vfsqsb:
19477 case SystemZ::BI__builtin_s390_vfsqdb: {
19478 llvm::Type *ResultType = ConvertType(E->getType());
19479 Value *X = EmitScalarExpr(E->getArg(0));
19480 if (Builder.getIsFPConstrained()) {
19481 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
19482 return Builder.CreateConstrainedFPCall(F, { X });
19483 } else {
19484 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
19485 return Builder.CreateCall(F, X);
19486 }
19487 }
19488 case SystemZ::BI__builtin_s390_vfmasb:
19489 case SystemZ::BI__builtin_s390_vfmadb: {
19490 llvm::Type *ResultType = ConvertType(E->getType());
19491 Value *X = EmitScalarExpr(E->getArg(0));
19492 Value *Y = EmitScalarExpr(E->getArg(1));
19493 Value *Z = EmitScalarExpr(E->getArg(2));
19494 if (Builder.getIsFPConstrained()) {
19495 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19496 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
19497 } else {
19498 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19499 return Builder.CreateCall(F, {X, Y, Z});
19500 }
19501 }
19502 case SystemZ::BI__builtin_s390_vfmssb:
19503 case SystemZ::BI__builtin_s390_vfmsdb: {
19504 llvm::Type *ResultType = ConvertType(E->getType());
19505 Value *X = EmitScalarExpr(E->getArg(0));
19506 Value *Y = EmitScalarExpr(E->getArg(1));
19507 Value *Z = EmitScalarExpr(E->getArg(2));
19508 if (Builder.getIsFPConstrained()) {
19509 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19510 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19511 } else {
19512 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19513 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19514 }
19515 }
19516 case SystemZ::BI__builtin_s390_vfnmasb:
19517 case SystemZ::BI__builtin_s390_vfnmadb: {
19518 llvm::Type *ResultType = ConvertType(E->getType());
19519 Value *X = EmitScalarExpr(E->getArg(0));
19520 Value *Y = EmitScalarExpr(E->getArg(1));
19521 Value *Z = EmitScalarExpr(E->getArg(2));
19522 if (Builder.getIsFPConstrained()) {
19523 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19524 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
19525 } else {
19526 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19527 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
19528 }
19529 }
19530 case SystemZ::BI__builtin_s390_vfnmssb:
19531 case SystemZ::BI__builtin_s390_vfnmsdb: {
19532 llvm::Type *ResultType = ConvertType(E->getType());
19533 Value *X = EmitScalarExpr(E->getArg(0));
19534 Value *Y = EmitScalarExpr(E->getArg(1));
19535 Value *Z = EmitScalarExpr(E->getArg(2));
19536 if (Builder.getIsFPConstrained()) {
19537 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19538 Value *NegZ = Builder.CreateFNeg(Z, "sub");
19539 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
19540 } else {
19541 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19542 Value *NegZ = Builder.CreateFNeg(Z, "neg");
19543 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
19544 }
19545 }
19546 case SystemZ::BI__builtin_s390_vflpsb:
19547 case SystemZ::BI__builtin_s390_vflpdb: {
19548 llvm::Type *ResultType = ConvertType(E->getType());
19549 Value *X = EmitScalarExpr(E->getArg(0));
19550 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19551 return Builder.CreateCall(F, X);
19552 }
19553 case SystemZ::BI__builtin_s390_vflnsb:
19554 case SystemZ::BI__builtin_s390_vflndb: {
19555 llvm::Type *ResultType = ConvertType(E->getType());
19556 Value *X = EmitScalarExpr(E->getArg(0));
19557 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19558 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
19559 }
19560 case SystemZ::BI__builtin_s390_vfisb:
19561 case SystemZ::BI__builtin_s390_vfidb: {
19562 llvm::Type *ResultType = ConvertType(E->getType());
19563 Value *X = EmitScalarExpr(E->getArg(0));
19564 // Constant-fold the M4 and M5 mask arguments.
19565 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
19566 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19567 // Check whether this instance can be represented via a LLVM standard
19568 // intrinsic. We only support some combinations of M4 and M5.
19569 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19570 Intrinsic::ID CI;
19571 switch (M4.getZExtValue()) {
19572 default: break;
19573 case 0: // IEEE-inexact exception allowed
19574 switch (M5.getZExtValue()) {
19575 default: break;
19576 case 0: ID = Intrinsic::rint;
19577 CI = Intrinsic::experimental_constrained_rint; break;
19578 }
19579 break;
19580 case 4: // IEEE-inexact exception suppressed
19581 switch (M5.getZExtValue()) {
19582 default: break;
19583 case 0: ID = Intrinsic::nearbyint;
19584 CI = Intrinsic::experimental_constrained_nearbyint; break;
19585 case 1: ID = Intrinsic::round;
19586 CI = Intrinsic::experimental_constrained_round; break;
19587 case 5: ID = Intrinsic::trunc;
19588 CI = Intrinsic::experimental_constrained_trunc; break;
19589 case 6: ID = Intrinsic::ceil;
19590 CI = Intrinsic::experimental_constrained_ceil; break;
19591 case 7: ID = Intrinsic::floor;
19592 CI = Intrinsic::experimental_constrained_floor; break;
19593 }
19594 break;
19595 }
19596 if (ID != Intrinsic::not_intrinsic) {
19597 if (Builder.getIsFPConstrained()) {
19598 Function *F = CGM.getIntrinsic(CI, ResultType);
19599 return Builder.CreateConstrainedFPCall(F, X);
19600 } else {
19601 Function *F = CGM.getIntrinsic(ID, ResultType);
19602 return Builder.CreateCall(F, X);
19603 }
19604 }
19605 switch (BuiltinID) { // FIXME: constrained version?
19606 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
19607 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
19608 default: llvm_unreachable("Unknown BuiltinID");
19609 }
19610 Function *F = CGM.getIntrinsic(ID);
19611 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19612 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
19613 return Builder.CreateCall(F, {X, M4Value, M5Value});
19614 }
19615 case SystemZ::BI__builtin_s390_vfmaxsb:
19616 case SystemZ::BI__builtin_s390_vfmaxdb: {
19617 llvm::Type *ResultType = ConvertType(E->getType());
19618 Value *X = EmitScalarExpr(E->getArg(0));
19619 Value *Y = EmitScalarExpr(E->getArg(1));
19620 // Constant-fold the M4 mask argument.
19621 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19622 // Check whether this instance can be represented via a LLVM standard
19623 // intrinsic. We only support some values of M4.
19624 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19625 Intrinsic::ID CI;
19626 switch (M4.getZExtValue()) {
19627 default: break;
19628 case 4: ID = Intrinsic::maxnum;
19629 CI = Intrinsic::experimental_constrained_maxnum; break;
19630 }
19631 if (ID != Intrinsic::not_intrinsic) {
19632 if (Builder.getIsFPConstrained()) {
19633 Function *F = CGM.getIntrinsic(CI, ResultType);
19634 return Builder.CreateConstrainedFPCall(F, {X, Y});
19635 } else {
19636 Function *F = CGM.getIntrinsic(ID, ResultType);
19637 return Builder.CreateCall(F, {X, Y});
19638 }
19639 }
19640 switch (BuiltinID) {
19641 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
19642 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
19643 default: llvm_unreachable("Unknown BuiltinID");
19644 }
19645 Function *F = CGM.getIntrinsic(ID);
19646 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19647 return Builder.CreateCall(F, {X, Y, M4Value});
19648 }
19649 case SystemZ::BI__builtin_s390_vfminsb:
19650 case SystemZ::BI__builtin_s390_vfmindb: {
19651 llvm::Type *ResultType = ConvertType(E->getType());
19652 Value *X = EmitScalarExpr(E->getArg(0));
19653 Value *Y = EmitScalarExpr(E->getArg(1));
19654 // Constant-fold the M4 mask argument.
19655 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19656 // Check whether this instance can be represented via a LLVM standard
19657 // intrinsic. We only support some values of M4.
19658 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19659 Intrinsic::ID CI;
19660 switch (M4.getZExtValue()) {
19661 default: break;
19662 case 4: ID = Intrinsic::minnum;
19663 CI = Intrinsic::experimental_constrained_minnum; break;
19664 }
19665 if (ID != Intrinsic::not_intrinsic) {
19666 if (Builder.getIsFPConstrained()) {
19667 Function *F = CGM.getIntrinsic(CI, ResultType);
19668 return Builder.CreateConstrainedFPCall(F, {X, Y});
19669 } else {
19670 Function *F = CGM.getIntrinsic(ID, ResultType);
19671 return Builder.CreateCall(F, {X, Y});
19672 }
19673 }
19674 switch (BuiltinID) {
19675 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
19676 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
19677 default: llvm_unreachable("Unknown BuiltinID");
19678 }
19679 Function *F = CGM.getIntrinsic(ID);
19680 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19681 return Builder.CreateCall(F, {X, Y, M4Value});
19682 }
19683
19684 case SystemZ::BI__builtin_s390_vlbrh:
19685 case SystemZ::BI__builtin_s390_vlbrf:
19686 case SystemZ::BI__builtin_s390_vlbrg: {
19687 llvm::Type *ResultType = ConvertType(E->getType());
19688 Value *X = EmitScalarExpr(E->getArg(0));
19689 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
19690 return Builder.CreateCall(F, X);
19691 }
19692
19693 // Vector intrinsics that output the post-instruction CC value.
19694
19695#define INTRINSIC_WITH_CC(NAME) \
19696 case SystemZ::BI__builtin_##NAME: \
19697 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
19698
19699 INTRINSIC_WITH_CC(s390_vpkshs);
19700 INTRINSIC_WITH_CC(s390_vpksfs);
19701 INTRINSIC_WITH_CC(s390_vpksgs);
19702
19703 INTRINSIC_WITH_CC(s390_vpklshs);
19704 INTRINSIC_WITH_CC(s390_vpklsfs);
19705 INTRINSIC_WITH_CC(s390_vpklsgs);
19706
19707 INTRINSIC_WITH_CC(s390_vceqbs);
19708 INTRINSIC_WITH_CC(s390_vceqhs);
19709 INTRINSIC_WITH_CC(s390_vceqfs);
19710 INTRINSIC_WITH_CC(s390_vceqgs);
19711
19712 INTRINSIC_WITH_CC(s390_vchbs);
19713 INTRINSIC_WITH_CC(s390_vchhs);
19714 INTRINSIC_WITH_CC(s390_vchfs);
19715 INTRINSIC_WITH_CC(s390_vchgs);
19716
19717 INTRINSIC_WITH_CC(s390_vchlbs);
19718 INTRINSIC_WITH_CC(s390_vchlhs);
19719 INTRINSIC_WITH_CC(s390_vchlfs);
19720 INTRINSIC_WITH_CC(s390_vchlgs);
19721
19722 INTRINSIC_WITH_CC(s390_vfaebs);
19723 INTRINSIC_WITH_CC(s390_vfaehs);
19724 INTRINSIC_WITH_CC(s390_vfaefs);
19725
19726 INTRINSIC_WITH_CC(s390_vfaezbs);
19727 INTRINSIC_WITH_CC(s390_vfaezhs);
19728 INTRINSIC_WITH_CC(s390_vfaezfs);
19729
19730 INTRINSIC_WITH_CC(s390_vfeebs);
19731 INTRINSIC_WITH_CC(s390_vfeehs);
19732 INTRINSIC_WITH_CC(s390_vfeefs);
19733
19734 INTRINSIC_WITH_CC(s390_vfeezbs);
19735 INTRINSIC_WITH_CC(s390_vfeezhs);
19736 INTRINSIC_WITH_CC(s390_vfeezfs);
19737
19738 INTRINSIC_WITH_CC(s390_vfenebs);
19739 INTRINSIC_WITH_CC(s390_vfenehs);
19740 INTRINSIC_WITH_CC(s390_vfenefs);
19741
19742 INTRINSIC_WITH_CC(s390_vfenezbs);
19743 INTRINSIC_WITH_CC(s390_vfenezhs);
19744 INTRINSIC_WITH_CC(s390_vfenezfs);
19745
19746 INTRINSIC_WITH_CC(s390_vistrbs);
19747 INTRINSIC_WITH_CC(s390_vistrhs);
19748 INTRINSIC_WITH_CC(s390_vistrfs);
19749
19750 INTRINSIC_WITH_CC(s390_vstrcbs);
19751 INTRINSIC_WITH_CC(s390_vstrchs);
19752 INTRINSIC_WITH_CC(s390_vstrcfs);
19753
19754 INTRINSIC_WITH_CC(s390_vstrczbs);
19755 INTRINSIC_WITH_CC(s390_vstrczhs);
19756 INTRINSIC_WITH_CC(s390_vstrczfs);
19757
19758 INTRINSIC_WITH_CC(s390_vfcesbs);
19759 INTRINSIC_WITH_CC(s390_vfcedbs);
19760 INTRINSIC_WITH_CC(s390_vfchsbs);
19761 INTRINSIC_WITH_CC(s390_vfchdbs);
19762 INTRINSIC_WITH_CC(s390_vfchesbs);
19763 INTRINSIC_WITH_CC(s390_vfchedbs);
19764
19765 INTRINSIC_WITH_CC(s390_vftcisb);
19766 INTRINSIC_WITH_CC(s390_vftcidb);
19767
19768 INTRINSIC_WITH_CC(s390_vstrsb);
19769 INTRINSIC_WITH_CC(s390_vstrsh);
19770 INTRINSIC_WITH_CC(s390_vstrsf);
19771
19772 INTRINSIC_WITH_CC(s390_vstrszb);
19773 INTRINSIC_WITH_CC(s390_vstrszh);
19774 INTRINSIC_WITH_CC(s390_vstrszf);
19775
19776#undef INTRINSIC_WITH_CC
19777
19778 default:
19779 return nullptr;
19780 }
19781}
19782
19783namespace {
19784// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
19785struct NVPTXMmaLdstInfo {
19786 unsigned NumResults; // Number of elements to load/store
19787 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
19788 unsigned IID_col;
19789 unsigned IID_row;
19790};
19791
19792#define MMA_INTR(geom_op_type, layout) \
19793 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
19794#define MMA_LDST(n, geom_op_type) \
19795 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
19796
19797static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
19798 switch (BuiltinID) {
19799 // FP MMA loads
19800 case NVPTX::BI__hmma_m16n16k16_ld_a:
19801 return MMA_LDST(8, m16n16k16_load_a_f16);
19802 case NVPTX::BI__hmma_m16n16k16_ld_b:
19803 return MMA_LDST(8, m16n16k16_load_b_f16);
19804 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19805 return MMA_LDST(4, m16n16k16_load_c_f16);
19806 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19807 return MMA_LDST(8, m16n16k16_load_c_f32);
19808 case NVPTX::BI__hmma_m32n8k16_ld_a:
19809 return MMA_LDST(8, m32n8k16_load_a_f16);
19810 case NVPTX::BI__hmma_m32n8k16_ld_b:
19811 return MMA_LDST(8, m32n8k16_load_b_f16);
19812 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19813 return MMA_LDST(4, m32n8k16_load_c_f16);
19814 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19815 return MMA_LDST(8, m32n8k16_load_c_f32);
19816 case NVPTX::BI__hmma_m8n32k16_ld_a:
19817 return MMA_LDST(8, m8n32k16_load_a_f16);
19818 case NVPTX::BI__hmma_m8n32k16_ld_b:
19819 return MMA_LDST(8, m8n32k16_load_b_f16);
19820 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
19821 return MMA_LDST(4, m8n32k16_load_c_f16);
19822 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
19823 return MMA_LDST(8, m8n32k16_load_c_f32);
19824
19825 // Integer MMA loads
19826 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
19827 return MMA_LDST(2, m16n16k16_load_a_s8);
19828 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
19829 return MMA_LDST(2, m16n16k16_load_a_u8);
19830 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
19831 return MMA_LDST(2, m16n16k16_load_b_s8);
19832 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
19833 return MMA_LDST(2, m16n16k16_load_b_u8);
19834 case NVPTX::BI__imma_m16n16k16_ld_c:
19835 return MMA_LDST(8, m16n16k16_load_c_s32);
19836 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
19837 return MMA_LDST(4, m32n8k16_load_a_s8);
19838 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
19839 return MMA_LDST(4, m32n8k16_load_a_u8);
19840 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
19841 return MMA_LDST(1, m32n8k16_load_b_s8);
19842 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
19843 return MMA_LDST(1, m32n8k16_load_b_u8);
19844 case NVPTX::BI__imma_m32n8k16_ld_c:
19845 return MMA_LDST(8, m32n8k16_load_c_s32);
19846 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
19847 return MMA_LDST(1, m8n32k16_load_a_s8);
19848 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
19849 return MMA_LDST(1, m8n32k16_load_a_u8);
19850 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
19851 return MMA_LDST(4, m8n32k16_load_b_s8);
19852 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
19853 return MMA_LDST(4, m8n32k16_load_b_u8);
19854 case NVPTX::BI__imma_m8n32k16_ld_c:
19855 return MMA_LDST(8, m8n32k16_load_c_s32);
19856
19857 // Sub-integer MMA loads.
19858 // Only row/col layout is supported by A/B fragments.
19859 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
19860 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
19861 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
19862 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
19863 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
19864 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
19865 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
19866 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
19867 case NVPTX::BI__imma_m8n8k32_ld_c:
19868 return MMA_LDST(2, m8n8k32_load_c_s32);
19869 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
19870 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
19871 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
19872 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
19873 case NVPTX::BI__bmma_m8n8k128_ld_c:
19874 return MMA_LDST(2, m8n8k128_load_c_s32);
19875
19876 // Double MMA loads
19877 case NVPTX::BI__dmma_m8n8k4_ld_a:
19878 return MMA_LDST(1, m8n8k4_load_a_f64);
19879 case NVPTX::BI__dmma_m8n8k4_ld_b:
19880 return MMA_LDST(1, m8n8k4_load_b_f64);
19881 case NVPTX::BI__dmma_m8n8k4_ld_c:
19882 return MMA_LDST(2, m8n8k4_load_c_f64);
19883
19884 // Alternate float MMA loads
19885 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
19886 return MMA_LDST(4, m16n16k16_load_a_bf16);
19887 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
19888 return MMA_LDST(4, m16n16k16_load_b_bf16);
19889 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
19890 return MMA_LDST(2, m8n32k16_load_a_bf16);
19891 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
19892 return MMA_LDST(8, m8n32k16_load_b_bf16);
19893 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
19894 return MMA_LDST(8, m32n8k16_load_a_bf16);
19895 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
19896 return MMA_LDST(2, m32n8k16_load_b_bf16);
19897 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
19898 return MMA_LDST(4, m16n16k8_load_a_tf32);
19899 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
19900 return MMA_LDST(4, m16n16k8_load_b_tf32);
19901 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
19902 return MMA_LDST(8, m16n16k8_load_c_f32);
19903
19904 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
19905 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
19906 // use fragment C for both loads and stores.
19907 // FP MMA stores.
19908 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
19909 return MMA_LDST(4, m16n16k16_store_d_f16);
19910 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
19911 return MMA_LDST(8, m16n16k16_store_d_f32);
19912 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
19913 return MMA_LDST(4, m32n8k16_store_d_f16);
19914 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
19915 return MMA_LDST(8, m32n8k16_store_d_f32);
19916 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
19917 return MMA_LDST(4, m8n32k16_store_d_f16);
19918 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
19919 return MMA_LDST(8, m8n32k16_store_d_f32);
19920
19921 // Integer and sub-integer MMA stores.
19922 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
19923 // name, integer loads/stores use LLVM's i32.
19924 case NVPTX::BI__imma_m16n16k16_st_c_i32:
19925 return MMA_LDST(8, m16n16k16_store_d_s32);
19926 case NVPTX::BI__imma_m32n8k16_st_c_i32:
19927 return MMA_LDST(8, m32n8k16_store_d_s32);
19928 case NVPTX::BI__imma_m8n32k16_st_c_i32:
19929 return MMA_LDST(8, m8n32k16_store_d_s32);
19930 case NVPTX::BI__imma_m8n8k32_st_c_i32:
19931 return MMA_LDST(2, m8n8k32_store_d_s32);
19932 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
19933 return MMA_LDST(2, m8n8k128_store_d_s32);
19934
19935 // Double MMA store
19936 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
19937 return MMA_LDST(2, m8n8k4_store_d_f64);
19938
19939 // Alternate float MMA store
19940 case NVPTX::BI__mma_m16n16k8_st_c_f32:
19941 return MMA_LDST(8, m16n16k8_store_d_f32);
19942
19943 default:
19944 llvm_unreachable("Unknown MMA builtin");
19945 }
19946}
19947#undef MMA_LDST
19948#undef MMA_INTR
19949
19950
19951struct NVPTXMmaInfo {
19952 unsigned NumEltsA;
19953 unsigned NumEltsB;
19954 unsigned NumEltsC;
19955 unsigned NumEltsD;
19956
19957 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
19958 // over 'col' for layout. The index of non-satf variants is expected to match
19959 // the undocumented layout constants used by CUDA's mma.hpp.
19960 std::array<unsigned, 8> Variants;
19961
19962 unsigned getMMAIntrinsic(int Layout, bool Satf) {
19963 unsigned Index = Layout + 4 * Satf;
19964 if (Index >= Variants.size())
19965 return 0;
19966 return Variants[Index];
19967 }
19968};
19969
19970 // Returns an intrinsic that matches Layout and Satf for valid combinations of
19971 // Layout and Satf, 0 otherwise.
19972static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
19973 // clang-format off
19974#define MMA_VARIANTS(geom, type) \
19975 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
19976 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19977 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
19978 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
19979#define MMA_SATF_VARIANTS(geom, type) \
19980 MMA_VARIANTS(geom, type), \
19981 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
19982 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19983 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
19984 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
19985// Sub-integer MMA only supports row.col layout.
19986#define MMA_VARIANTS_I4(geom, type) \
19987 0, \
19988 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19989 0, \
19990 0, \
19991 0, \
19992 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19993 0, \
19994 0
19995// b1 MMA does not support .satfinite.
19996#define MMA_VARIANTS_B1_XOR(geom, type) \
19997 0, \
19998 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
19999 0, \
20000 0, \
20001 0, \
20002 0, \
20003 0, \
20004 0
20005#define MMA_VARIANTS_B1_AND(geom, type) \
20006 0, \
20007 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
20008 0, \
20009 0, \
20010 0, \
20011 0, \
20012 0, \
20013 0
20014 // clang-format on
20015 switch (BuiltinID) {
20016 // FP MMA
20017 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
20018 // NumEltsN of return value are ordered as A,B,C,D.
20019 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
20020 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
20021 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
20022 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
20023 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
20024 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
20025 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
20026 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
20027 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
20028 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
20029 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
20030 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
20031 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
20032 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
20033 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
20034 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
20035 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
20036 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
20037 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
20038 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
20039 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
20040 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
20041 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
20042 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
20043
20044 // Integer MMA
20045 case NVPTX::BI__imma_m16n16k16_mma_s8:
20046 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
20047 case NVPTX::BI__imma_m16n16k16_mma_u8:
20048 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
20049 case NVPTX::BI__imma_m32n8k16_mma_s8:
20050 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
20051 case NVPTX::BI__imma_m32n8k16_mma_u8:
20052 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
20053 case NVPTX::BI__imma_m8n32k16_mma_s8:
20054 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
20055 case NVPTX::BI__imma_m8n32k16_mma_u8:
20056 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
20057
20058 // Sub-integer MMA
20059 case NVPTX::BI__imma_m8n8k32_mma_s4:
20060 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
20061 case NVPTX::BI__imma_m8n8k32_mma_u4:
20062 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
20063 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20064 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
20065 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20066 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
20067
20068 // Double MMA
20069 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20070 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
20071
20072 // Alternate FP MMA
20073 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20074 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
20075 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20076 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
20077 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20078 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
20079 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
20080 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
20081 default:
20082 llvm_unreachable("Unexpected builtin ID.");
20083 }
20084#undef MMA_VARIANTS
20085#undef MMA_SATF_VARIANTS
20086#undef MMA_VARIANTS_I4
20087#undef MMA_VARIANTS_B1_AND
20088#undef MMA_VARIANTS_B1_XOR
20089}
20090
20091static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
20092 const CallExpr *E) {
20093 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
20094 QualType ArgType = E->getArg(0)->getType();
20096 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
20097 return CGF.Builder.CreateCall(
20098 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
20099 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
20100}
20101
20102static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
20103 const CallExpr *E) {
20104 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
20105 llvm::Type *ElemTy =
20106 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20107 return CGF.Builder.CreateCall(
20108 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
20109 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
20110}
20111
20112static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
20113 CodeGenFunction &CGF, const CallExpr *E,
20114 int SrcSize) {
20115 return E->getNumArgs() == 3
20116 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
20117 {CGF.EmitScalarExpr(E->getArg(0)),
20118 CGF.EmitScalarExpr(E->getArg(1)),
20119 CGF.EmitScalarExpr(E->getArg(2))})
20120 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
20121 {CGF.EmitScalarExpr(E->getArg(0)),
20122 CGF.EmitScalarExpr(E->getArg(1))});
20123}
20124
20125static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
20126 const CallExpr *E, CodeGenFunction &CGF) {
20127 auto &C = CGF.CGM.getContext();
20128 if (!(C.getLangOpts().NativeHalfType ||
20129 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
20130 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
20131 " requires native half type support.");
20132 return nullptr;
20133 }
20134
20135 if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
20136 IntrinsicID == Intrinsic::nvvm_ldu_global_f)
20137 return MakeLdgLdu(IntrinsicID, CGF, E);
20138
20140 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
20141 auto *FTy = F->getFunctionType();
20142 unsigned ICEArguments = 0;
20144 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
20145 assert(Error == ASTContext::GE_None && "Should not codegen an error");
20146 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
20147 assert((ICEArguments & (1 << i)) == 0);
20148 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
20149 auto *PTy = FTy->getParamType(i);
20150 if (PTy != ArgValue->getType())
20151 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
20152 Args.push_back(ArgValue);
20153 }
20154
20155 return CGF.Builder.CreateCall(F, Args);
20156}
20157} // namespace
20158
20160 const CallExpr *E) {
20161 switch (BuiltinID) {
20162 case NVPTX::BI__nvvm_atom_add_gen_i:
20163 case NVPTX::BI__nvvm_atom_add_gen_l:
20164 case NVPTX::BI__nvvm_atom_add_gen_ll:
20165 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
20166
20167 case NVPTX::BI__nvvm_atom_sub_gen_i:
20168 case NVPTX::BI__nvvm_atom_sub_gen_l:
20169 case NVPTX::BI__nvvm_atom_sub_gen_ll:
20170 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
20171
20172 case NVPTX::BI__nvvm_atom_and_gen_i:
20173 case NVPTX::BI__nvvm_atom_and_gen_l:
20174 case NVPTX::BI__nvvm_atom_and_gen_ll:
20175 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
20176
20177 case NVPTX::BI__nvvm_atom_or_gen_i:
20178 case NVPTX::BI__nvvm_atom_or_gen_l:
20179 case NVPTX::BI__nvvm_atom_or_gen_ll:
20180 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
20181
20182 case NVPTX::BI__nvvm_atom_xor_gen_i:
20183 case NVPTX::BI__nvvm_atom_xor_gen_l:
20184 case NVPTX::BI__nvvm_atom_xor_gen_ll:
20185 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
20186
20187 case NVPTX::BI__nvvm_atom_xchg_gen_i:
20188 case NVPTX::BI__nvvm_atom_xchg_gen_l:
20189 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
20190 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
20191
20192 case NVPTX::BI__nvvm_atom_max_gen_i:
20193 case NVPTX::BI__nvvm_atom_max_gen_l:
20194 case NVPTX::BI__nvvm_atom_max_gen_ll:
20195 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
20196
20197 case NVPTX::BI__nvvm_atom_max_gen_ui:
20198 case NVPTX::BI__nvvm_atom_max_gen_ul:
20199 case NVPTX::BI__nvvm_atom_max_gen_ull:
20200 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
20201
20202 case NVPTX::BI__nvvm_atom_min_gen_i:
20203 case NVPTX::BI__nvvm_atom_min_gen_l:
20204 case NVPTX::BI__nvvm_atom_min_gen_ll:
20205 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
20206
20207 case NVPTX::BI__nvvm_atom_min_gen_ui:
20208 case NVPTX::BI__nvvm_atom_min_gen_ul:
20209 case NVPTX::BI__nvvm_atom_min_gen_ull:
20210 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
20211
20212 case NVPTX::BI__nvvm_atom_cas_gen_i:
20213 case NVPTX::BI__nvvm_atom_cas_gen_l:
20214 case NVPTX::BI__nvvm_atom_cas_gen_ll:
20215 // __nvvm_atom_cas_gen_* should return the old value rather than the
20216 // success flag.
20217 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
20218
20219 case NVPTX::BI__nvvm_atom_add_gen_f:
20220 case NVPTX::BI__nvvm_atom_add_gen_d: {
20221 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
20222 Value *Val = EmitScalarExpr(E->getArg(1));
20223
20224 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
20225 AtomicOrdering::SequentiallyConsistent);
20226 }
20227
20228 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
20229 Value *Ptr = EmitScalarExpr(E->getArg(0));
20230 Value *Val = EmitScalarExpr(E->getArg(1));
20231 Function *FnALI32 =
20232 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
20233 return Builder.CreateCall(FnALI32, {Ptr, Val});
20234 }
20235
20236 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
20237 Value *Ptr = EmitScalarExpr(E->getArg(0));
20238 Value *Val = EmitScalarExpr(E->getArg(1));
20239 Function *FnALD32 =
20240 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
20241 return Builder.CreateCall(FnALD32, {Ptr, Val});
20242 }
20243
20244 case NVPTX::BI__nvvm_ldg_c:
20245 case NVPTX::BI__nvvm_ldg_sc:
20246 case NVPTX::BI__nvvm_ldg_c2:
20247 case NVPTX::BI__nvvm_ldg_sc2:
20248 case NVPTX::BI__nvvm_ldg_c4:
20249 case NVPTX::BI__nvvm_ldg_sc4:
20250 case NVPTX::BI__nvvm_ldg_s:
20251 case NVPTX::BI__nvvm_ldg_s2:
20252 case NVPTX::BI__nvvm_ldg_s4:
20253 case NVPTX::BI__nvvm_ldg_i:
20254 case NVPTX::BI__nvvm_ldg_i2:
20255 case NVPTX::BI__nvvm_ldg_i4:
20256 case NVPTX::BI__nvvm_ldg_l:
20257 case NVPTX::BI__nvvm_ldg_l2:
20258 case NVPTX::BI__nvvm_ldg_ll:
20259 case NVPTX::BI__nvvm_ldg_ll2:
20260 case NVPTX::BI__nvvm_ldg_uc:
20261 case NVPTX::BI__nvvm_ldg_uc2:
20262 case NVPTX::BI__nvvm_ldg_uc4:
20263 case NVPTX::BI__nvvm_ldg_us:
20264 case NVPTX::BI__nvvm_ldg_us2:
20265 case NVPTX::BI__nvvm_ldg_us4:
20266 case NVPTX::BI__nvvm_ldg_ui:
20267 case NVPTX::BI__nvvm_ldg_ui2:
20268 case NVPTX::BI__nvvm_ldg_ui4:
20269 case NVPTX::BI__nvvm_ldg_ul:
20270 case NVPTX::BI__nvvm_ldg_ul2:
20271 case NVPTX::BI__nvvm_ldg_ull:
20272 case NVPTX::BI__nvvm_ldg_ull2:
20273 // PTX Interoperability section 2.2: "For a vector with an even number of
20274 // elements, its alignment is set to number of elements times the alignment
20275 // of its member: n*alignof(t)."
20276 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
20277 case NVPTX::BI__nvvm_ldg_f:
20278 case NVPTX::BI__nvvm_ldg_f2:
20279 case NVPTX::BI__nvvm_ldg_f4:
20280 case NVPTX::BI__nvvm_ldg_d:
20281 case NVPTX::BI__nvvm_ldg_d2:
20282 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
20283
20284 case NVPTX::BI__nvvm_ldu_c:
20285 case NVPTX::BI__nvvm_ldu_sc:
20286 case NVPTX::BI__nvvm_ldu_c2:
20287 case NVPTX::BI__nvvm_ldu_sc2:
20288 case NVPTX::BI__nvvm_ldu_c4:
20289 case NVPTX::BI__nvvm_ldu_sc4:
20290 case NVPTX::BI__nvvm_ldu_s:
20291 case NVPTX::BI__nvvm_ldu_s2:
20292 case NVPTX::BI__nvvm_ldu_s4:
20293 case NVPTX::BI__nvvm_ldu_i:
20294 case NVPTX::BI__nvvm_ldu_i2:
20295 case NVPTX::BI__nvvm_ldu_i4:
20296 case NVPTX::BI__nvvm_ldu_l:
20297 case NVPTX::BI__nvvm_ldu_l2:
20298 case NVPTX::BI__nvvm_ldu_ll:
20299 case NVPTX::BI__nvvm_ldu_ll2:
20300 case NVPTX::BI__nvvm_ldu_uc:
20301 case NVPTX::BI__nvvm_ldu_uc2:
20302 case NVPTX::BI__nvvm_ldu_uc4:
20303 case NVPTX::BI__nvvm_ldu_us:
20304 case NVPTX::BI__nvvm_ldu_us2:
20305 case NVPTX::BI__nvvm_ldu_us4:
20306 case NVPTX::BI__nvvm_ldu_ui:
20307 case NVPTX::BI__nvvm_ldu_ui2:
20308 case NVPTX::BI__nvvm_ldu_ui4:
20309 case NVPTX::BI__nvvm_ldu_ul:
20310 case NVPTX::BI__nvvm_ldu_ul2:
20311 case NVPTX::BI__nvvm_ldu_ull:
20312 case NVPTX::BI__nvvm_ldu_ull2:
20313 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
20314 case NVPTX::BI__nvvm_ldu_f:
20315 case NVPTX::BI__nvvm_ldu_f2:
20316 case NVPTX::BI__nvvm_ldu_f4:
20317 case NVPTX::BI__nvvm_ldu_d:
20318 case NVPTX::BI__nvvm_ldu_d2:
20319 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
20320
20321 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
20322 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
20323 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
20324 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
20325 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
20326 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
20327 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
20328 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
20329 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
20330 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
20331 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
20332 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
20333 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
20334 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
20335 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
20336 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
20337 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
20338 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
20339 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
20340 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
20341 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
20342 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
20343 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
20344 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
20345 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
20346 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
20347 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
20348 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
20349 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
20350 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
20351 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
20352 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
20353 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
20354 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
20355 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
20356 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
20357 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
20358 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
20359 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
20360 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
20361 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
20362 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
20363 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
20364 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
20365 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
20366 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
20367 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
20368 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
20369 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
20370 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
20371 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
20372 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
20373 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
20374 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
20375 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
20376 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
20377 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
20378 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
20379 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
20380 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
20381 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
20382 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
20383 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
20384 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
20385 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
20386 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
20387 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
20388 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
20389 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
20390 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
20391 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
20392 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
20393 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
20394 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
20395 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
20396 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
20397 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
20398 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
20399 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
20400 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
20401 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
20402 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
20403 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
20404 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
20405 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
20406 Value *Ptr = EmitScalarExpr(E->getArg(0));
20407 llvm::Type *ElemTy =
20408 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20409 return Builder.CreateCall(
20411 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
20412 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20413 }
20414 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
20415 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
20416 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
20417 Value *Ptr = EmitScalarExpr(E->getArg(0));
20418 llvm::Type *ElemTy =
20419 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20420 return Builder.CreateCall(
20422 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
20423 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20424 }
20425 case NVPTX::BI__nvvm_match_all_sync_i32p:
20426 case NVPTX::BI__nvvm_match_all_sync_i64p: {
20427 Value *Mask = EmitScalarExpr(E->getArg(0));
20428 Value *Val = EmitScalarExpr(E->getArg(1));
20429 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
20430 Value *ResultPair = Builder.CreateCall(
20431 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
20432 ? Intrinsic::nvvm_match_all_sync_i32p
20433 : Intrinsic::nvvm_match_all_sync_i64p),
20434 {Mask, Val});
20435 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
20436 PredOutPtr.getElementType());
20437 Builder.CreateStore(Pred, PredOutPtr);
20438 return Builder.CreateExtractValue(ResultPair, 0);
20439 }
20440
20441 // FP MMA loads
20442 case NVPTX::BI__hmma_m16n16k16_ld_a:
20443 case NVPTX::BI__hmma_m16n16k16_ld_b:
20444 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20445 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20446 case NVPTX::BI__hmma_m32n8k16_ld_a:
20447 case NVPTX::BI__hmma_m32n8k16_ld_b:
20448 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20449 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20450 case NVPTX::BI__hmma_m8n32k16_ld_a:
20451 case NVPTX::BI__hmma_m8n32k16_ld_b:
20452 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20453 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20454 // Integer MMA loads.
20455 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20456 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20457 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20458 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20459 case NVPTX::BI__imma_m16n16k16_ld_c:
20460 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20461 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20462 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20463 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20464 case NVPTX::BI__imma_m32n8k16_ld_c:
20465 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20466 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20467 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20468 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20469 case NVPTX::BI__imma_m8n32k16_ld_c:
20470 // Sub-integer MMA loads.
20471 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20472 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20473 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20474 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20475 case NVPTX::BI__imma_m8n8k32_ld_c:
20476 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20477 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20478 case NVPTX::BI__bmma_m8n8k128_ld_c:
20479 // Double MMA loads.
20480 case NVPTX::BI__dmma_m8n8k4_ld_a:
20481 case NVPTX::BI__dmma_m8n8k4_ld_b:
20482 case NVPTX::BI__dmma_m8n8k4_ld_c:
20483 // Alternate float MMA loads.
20484 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20485 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20486 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20487 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20488 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20489 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20490 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20491 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20492 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
20493 Address Dst = EmitPointerWithAlignment(E->getArg(0));
20494 Value *Src = EmitScalarExpr(E->getArg(1));
20495 Value *Ldm = EmitScalarExpr(E->getArg(2));
20496 std::optional<llvm::APSInt> isColMajorArg =
20497 E->getArg(3)->getIntegerConstantExpr(getContext());
20498 if (!isColMajorArg)
20499 return nullptr;
20500 bool isColMajor = isColMajorArg->getSExtValue();
20501 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20502 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20503 if (IID == 0)
20504 return nullptr;
20505
20506 Value *Result =
20507 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
20508
20509 // Save returned values.
20510 assert(II.NumResults);
20511 if (II.NumResults == 1) {
20514 } else {
20515 for (unsigned i = 0; i < II.NumResults; ++i) {
20517 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
20518 Dst.getElementType()),
20520 llvm::ConstantInt::get(IntTy, i)),
20522 }
20523 }
20524 return Result;
20525 }
20526
20527 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20528 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20529 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20530 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20531 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20532 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20533 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20534 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20535 case NVPTX::BI__imma_m8n32k16_st_c_i32:
20536 case NVPTX::BI__imma_m8n8k32_st_c_i32:
20537 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
20538 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
20539 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
20540 Value *Dst = EmitScalarExpr(E->getArg(0));
20541 Address Src = EmitPointerWithAlignment(E->getArg(1));
20542 Value *Ldm = EmitScalarExpr(E->getArg(2));
20543 std::optional<llvm::APSInt> isColMajorArg =
20544 E->getArg(3)->getIntegerConstantExpr(getContext());
20545 if (!isColMajorArg)
20546 return nullptr;
20547 bool isColMajor = isColMajorArg->getSExtValue();
20548 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20549 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20550 if (IID == 0)
20551 return nullptr;
20552 Function *Intrinsic =
20553 CGM.getIntrinsic(IID, Dst->getType());
20554 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
20555 SmallVector<Value *, 10> Values = {Dst};
20556 for (unsigned i = 0; i < II.NumResults; ++i) {
20558 Src.getElementType(),
20560 llvm::ConstantInt::get(IntTy, i)),
20562 Values.push_back(Builder.CreateBitCast(V, ParamType));
20563 }
20564 Values.push_back(Ldm);
20565 Value *Result = Builder.CreateCall(Intrinsic, Values);
20566 return Result;
20567 }
20568
20569 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
20570 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
20571 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
20572 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
20573 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
20574 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
20575 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
20576 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
20577 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
20578 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
20579 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
20580 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
20581 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
20582 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
20583 case NVPTX::BI__imma_m16n16k16_mma_s8:
20584 case NVPTX::BI__imma_m16n16k16_mma_u8:
20585 case NVPTX::BI__imma_m32n8k16_mma_s8:
20586 case NVPTX::BI__imma_m32n8k16_mma_u8:
20587 case NVPTX::BI__imma_m8n32k16_mma_s8:
20588 case NVPTX::BI__imma_m8n32k16_mma_u8:
20589 case NVPTX::BI__imma_m8n8k32_mma_s4:
20590 case NVPTX::BI__imma_m8n8k32_mma_u4:
20591 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20592 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20593 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20594 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20595 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20596 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20597 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
20598 Address Dst = EmitPointerWithAlignment(E->getArg(0));
20599 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
20600 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
20601 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
20602 std::optional<llvm::APSInt> LayoutArg =
20603 E->getArg(4)->getIntegerConstantExpr(getContext());
20604 if (!LayoutArg)
20605 return nullptr;
20606 int Layout = LayoutArg->getSExtValue();
20607 if (Layout < 0 || Layout > 3)
20608 return nullptr;
20609 llvm::APSInt SatfArg;
20610 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
20611 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
20612 SatfArg = 0; // .b1 does not have satf argument.
20613 else if (std::optional<llvm::APSInt> OptSatfArg =
20614 E->getArg(5)->getIntegerConstantExpr(getContext()))
20615 SatfArg = *OptSatfArg;
20616 else
20617 return nullptr;
20618 bool Satf = SatfArg.getSExtValue();
20619 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
20620 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
20621 if (IID == 0) // Unsupported combination of Layout/Satf.
20622 return nullptr;
20623
20625 Function *Intrinsic = CGM.getIntrinsic(IID);
20626 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
20627 // Load A
20628 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
20630 SrcA.getElementType(),
20631 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
20632 llvm::ConstantInt::get(IntTy, i)),
20634 Values.push_back(Builder.CreateBitCast(V, AType));
20635 }
20636 // Load B
20637 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
20638 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
20640 SrcB.getElementType(),
20641 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
20642 llvm::ConstantInt::get(IntTy, i)),
20644 Values.push_back(Builder.CreateBitCast(V, BType));
20645 }
20646 // Load C
20647 llvm::Type *CType =
20648 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
20649 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
20651 SrcC.getElementType(),
20652 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
20653 llvm::ConstantInt::get(IntTy, i)),
20655 Values.push_back(Builder.CreateBitCast(V, CType));
20656 }
20657 Value *Result = Builder.CreateCall(Intrinsic, Values);
20658 llvm::Type *DType = Dst.getElementType();
20659 for (unsigned i = 0; i < MI.NumEltsD; ++i)
20661 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
20663 llvm::ConstantInt::get(IntTy, i)),
20665 return Result;
20666 }
20667 // The following builtins require half type support
20668 case NVPTX::BI__nvvm_ex2_approx_f16:
20669 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
20670 case NVPTX::BI__nvvm_ex2_approx_f16x2:
20671 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
20672 case NVPTX::BI__nvvm_ff2f16x2_rn:
20673 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
20674 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
20675 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
20676 case NVPTX::BI__nvvm_ff2f16x2_rz:
20677 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
20678 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
20679 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
20680 case NVPTX::BI__nvvm_fma_rn_f16:
20681 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
20682 case NVPTX::BI__nvvm_fma_rn_f16x2:
20683 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
20684 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
20685 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
20686 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
20687 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
20688 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
20689 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
20690 *this);
20691 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
20692 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
20693 *this);
20694 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
20695 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
20696 *this);
20697 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
20698 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
20699 *this);
20700 case NVPTX::BI__nvvm_fma_rn_relu_f16:
20701 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
20702 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
20703 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
20704 case NVPTX::BI__nvvm_fma_rn_sat_f16:
20705 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
20706 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
20707 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
20708 case NVPTX::BI__nvvm_fmax_f16:
20709 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
20710 case NVPTX::BI__nvvm_fmax_f16x2:
20711 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
20712 case NVPTX::BI__nvvm_fmax_ftz_f16:
20713 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
20714 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
20715 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
20716 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
20717 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
20718 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
20719 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
20720 *this);
20721 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
20722 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
20723 E, *this);
20724 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
20725 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
20726 BuiltinID, E, *this);
20727 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
20728 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
20729 *this);
20730 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
20731 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
20732 E, *this);
20733 case NVPTX::BI__nvvm_fmax_nan_f16:
20734 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
20735 case NVPTX::BI__nvvm_fmax_nan_f16x2:
20736 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
20737 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
20738 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
20739 *this);
20740 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
20741 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
20742 E, *this);
20743 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
20744 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
20745 *this);
20746 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
20747 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
20748 *this);
20749 case NVPTX::BI__nvvm_fmin_f16:
20750 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
20751 case NVPTX::BI__nvvm_fmin_f16x2:
20752 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
20753 case NVPTX::BI__nvvm_fmin_ftz_f16:
20754 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
20755 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
20756 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
20757 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
20758 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
20759 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
20760 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
20761 *this);
20762 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
20763 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
20764 E, *this);
20765 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
20766 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
20767 BuiltinID, E, *this);
20768 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
20769 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
20770 *this);
20771 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
20772 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
20773 E, *this);
20774 case NVPTX::BI__nvvm_fmin_nan_f16:
20775 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
20776 case NVPTX::BI__nvvm_fmin_nan_f16x2:
20777 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
20778 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
20779 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
20780 *this);
20781 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
20782 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
20783 E, *this);
20784 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
20785 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
20786 *this);
20787 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
20788 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
20789 *this);
20790 case NVPTX::BI__nvvm_ldg_h:
20791 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20792 case NVPTX::BI__nvvm_ldg_h2:
20793 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20794 case NVPTX::BI__nvvm_ldu_h:
20795 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20796 case NVPTX::BI__nvvm_ldu_h2: {
20797 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20798 }
20799 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
20800 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
20801 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
20802 4);
20803 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
20804 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
20805 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
20806 8);
20807 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
20808 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
20809 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
20810 16);
20811 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
20812 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
20813 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
20814 16);
20815 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
20816 return Builder.CreateCall(
20817 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
20818 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
20819 return Builder.CreateCall(
20820 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
20821 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
20822 return Builder.CreateCall(
20823 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
20824 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
20825 return Builder.CreateCall(
20826 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
20827 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
20828 return Builder.CreateCall(
20829 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
20830 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
20831 return Builder.CreateCall(
20832 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
20833 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
20834 return Builder.CreateCall(
20835 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
20836 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
20837 return Builder.CreateCall(
20838 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
20839 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
20840 return Builder.CreateCall(
20841 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
20842 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
20843 return Builder.CreateCall(
20844 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
20845 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
20846 return Builder.CreateCall(
20847 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
20848 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
20849 return Builder.CreateCall(
20850 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
20851 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
20852 return Builder.CreateCall(
20853 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
20854 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
20855 return Builder.CreateCall(
20856 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
20857 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
20858 return Builder.CreateCall(
20859 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
20860 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
20861 return Builder.CreateCall(
20862 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
20863 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
20864 return Builder.CreateCall(
20865 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
20866 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
20867 return Builder.CreateCall(
20868 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
20869 case NVPTX::BI__nvvm_is_explicit_cluster:
20870 return Builder.CreateCall(
20871 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
20872 case NVPTX::BI__nvvm_isspacep_shared_cluster:
20873 return Builder.CreateCall(
20874 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
20875 EmitScalarExpr(E->getArg(0)));
20876 case NVPTX::BI__nvvm_mapa:
20877 return Builder.CreateCall(
20878 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
20879 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20880 case NVPTX::BI__nvvm_mapa_shared_cluster:
20881 return Builder.CreateCall(
20882 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
20883 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20884 case NVPTX::BI__nvvm_getctarank:
20885 return Builder.CreateCall(
20886 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
20887 EmitScalarExpr(E->getArg(0)));
20888 case NVPTX::BI__nvvm_getctarank_shared_cluster:
20889 return Builder.CreateCall(
20890 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
20891 EmitScalarExpr(E->getArg(0)));
20892 case NVPTX::BI__nvvm_barrier_cluster_arrive:
20893 return Builder.CreateCall(
20894 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
20895 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
20896 return Builder.CreateCall(
20897 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
20898 case NVPTX::BI__nvvm_barrier_cluster_wait:
20899 return Builder.CreateCall(
20900 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
20901 case NVPTX::BI__nvvm_fence_sc_cluster:
20902 return Builder.CreateCall(
20903 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
20904 default:
20905 return nullptr;
20906 }
20907}
20908
20909namespace {
20910struct BuiltinAlignArgs {
20911 llvm::Value *Src = nullptr;
20912 llvm::Type *SrcType = nullptr;
20913 llvm::Value *Alignment = nullptr;
20914 llvm::Value *Mask = nullptr;
20915 llvm::IntegerType *IntType = nullptr;
20916
20917 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
20918 QualType AstType = E->getArg(0)->getType();
20919 if (AstType->isArrayType())
20920 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
20921 else
20922 Src = CGF.EmitScalarExpr(E->getArg(0));
20923 SrcType = Src->getType();
20924 if (SrcType->isPointerTy()) {
20925 IntType = IntegerType::get(
20926 CGF.getLLVMContext(),
20927 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
20928 } else {
20929 assert(SrcType->isIntegerTy());
20930 IntType = cast<llvm::IntegerType>(SrcType);
20931 }
20932 Alignment = CGF.EmitScalarExpr(E->getArg(1));
20933 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
20934 auto *One = llvm::ConstantInt::get(IntType, 1);
20935 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
20936 }
20937};
20938} // namespace
20939
20940/// Generate (x & (y-1)) == 0.
20942 BuiltinAlignArgs Args(E, *this);
20943 llvm::Value *SrcAddress = Args.Src;
20944 if (Args.SrcType->isPointerTy())
20945 SrcAddress =
20946 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
20947 return RValue::get(Builder.CreateICmpEQ(
20948 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
20949 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
20950}
20951
20952/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
20953/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
20954/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
20956 BuiltinAlignArgs Args(E, *this);
20957 llvm::Value *SrcForMask = Args.Src;
20958 if (AlignUp) {
20959 // When aligning up we have to first add the mask to ensure we go over the
20960 // next alignment value and then align down to the next valid multiple.
20961 // By adding the mask, we ensure that align_up on an already aligned
20962 // value will not change the value.
20963 if (Args.Src->getType()->isPointerTy()) {
20964 if (getLangOpts().isSignedOverflowDefined())
20965 SrcForMask =
20966 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
20967 else
20968 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
20969 /*SignedIndices=*/true,
20970 /*isSubtraction=*/false,
20971 E->getExprLoc(), "over_boundary");
20972 } else {
20973 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
20974 }
20975 }
20976 // Invert the mask to only clear the lower bits.
20977 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
20978 llvm::Value *Result = nullptr;
20979 if (Args.Src->getType()->isPointerTy()) {
20980 Result = Builder.CreateIntrinsic(
20981 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
20982 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
20983 } else {
20984 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
20985 }
20986 assert(Result->getType() == Args.SrcType);
20987 return RValue::get(Result);
20988}
20989
20991 const CallExpr *E) {
20992 switch (BuiltinID) {
20993 case WebAssembly::BI__builtin_wasm_memory_size: {
20994 llvm::Type *ResultType = ConvertType(E->getType());
20995 Value *I = EmitScalarExpr(E->getArg(0));
20996 Function *Callee =
20997 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
20998 return Builder.CreateCall(Callee, I);
20999 }
21000 case WebAssembly::BI__builtin_wasm_memory_grow: {
21001 llvm::Type *ResultType = ConvertType(E->getType());
21002 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
21003 EmitScalarExpr(E->getArg(1))};
21004 Function *Callee =
21005 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
21006 return Builder.CreateCall(Callee, Args);
21007 }
21008 case WebAssembly::BI__builtin_wasm_tls_size: {
21009 llvm::Type *ResultType = ConvertType(E->getType());
21010 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
21011 return Builder.CreateCall(Callee);
21012 }
21013 case WebAssembly::BI__builtin_wasm_tls_align: {
21014 llvm::Type *ResultType = ConvertType(E->getType());
21015 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
21016 return Builder.CreateCall(Callee);
21017 }
21018 case WebAssembly::BI__builtin_wasm_tls_base: {
21019 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
21020 return Builder.CreateCall(Callee);
21021 }
21022 case WebAssembly::BI__builtin_wasm_throw: {
21023 Value *Tag = EmitScalarExpr(E->getArg(0));
21024 Value *Obj = EmitScalarExpr(E->getArg(1));
21025 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
21026 return Builder.CreateCall(Callee, {Tag, Obj});
21027 }
21028 case WebAssembly::BI__builtin_wasm_rethrow: {
21029 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
21030 return Builder.CreateCall(Callee);
21031 }
21032 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
21033 Value *Addr = EmitScalarExpr(E->getArg(0));
21034 Value *Expected = EmitScalarExpr(E->getArg(1));
21035 Value *Timeout = EmitScalarExpr(E->getArg(2));
21036 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
21037 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
21038 }
21039 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
21040 Value *Addr = EmitScalarExpr(E->getArg(0));
21041 Value *Expected = EmitScalarExpr(E->getArg(1));
21042 Value *Timeout = EmitScalarExpr(E->getArg(2));
21043 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
21044 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
21045 }
21046 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
21047 Value *Addr = EmitScalarExpr(E->getArg(0));
21048 Value *Count = EmitScalarExpr(E->getArg(1));
21049 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
21050 return Builder.CreateCall(Callee, {Addr, Count});
21051 }
21052 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
21053 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
21054 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
21055 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
21056 Value *Src = EmitScalarExpr(E->getArg(0));
21057 llvm::Type *ResT = ConvertType(E->getType());
21058 Function *Callee =
21059 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
21060 return Builder.CreateCall(Callee, {Src});
21061 }
21062 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
21063 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
21064 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
21065 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
21066 Value *Src = EmitScalarExpr(E->getArg(0));
21067 llvm::Type *ResT = ConvertType(E->getType());
21068 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
21069 {ResT, Src->getType()});
21070 return Builder.CreateCall(Callee, {Src});
21071 }
21072 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
21073 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
21074 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
21075 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
21076 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
21077 Value *Src = EmitScalarExpr(E->getArg(0));
21078 llvm::Type *ResT = ConvertType(E->getType());
21079 Function *Callee =
21080 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
21081 return Builder.CreateCall(Callee, {Src});
21082 }
21083 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
21084 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
21085 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
21086 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
21087 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
21088 Value *Src = EmitScalarExpr(E->getArg(0));
21089 llvm::Type *ResT = ConvertType(E->getType());
21090 Function *Callee =
21091 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
21092 return Builder.CreateCall(Callee, {Src});
21093 }
21094 case WebAssembly::BI__builtin_wasm_min_f32:
21095 case WebAssembly::BI__builtin_wasm_min_f64:
21096 case WebAssembly::BI__builtin_wasm_min_f16x8:
21097 case WebAssembly::BI__builtin_wasm_min_f32x4:
21098 case WebAssembly::BI__builtin_wasm_min_f64x2: {
21099 Value *LHS = EmitScalarExpr(E->getArg(0));
21100 Value *RHS = EmitScalarExpr(E->getArg(1));
21101 Function *Callee =
21102 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
21103 return Builder.CreateCall(Callee, {LHS, RHS});
21104 }
21105 case WebAssembly::BI__builtin_wasm_max_f32:
21106 case WebAssembly::BI__builtin_wasm_max_f64:
21107 case WebAssembly::BI__builtin_wasm_max_f16x8:
21108 case WebAssembly::BI__builtin_wasm_max_f32x4:
21109 case WebAssembly::BI__builtin_wasm_max_f64x2: {
21110 Value *LHS = EmitScalarExpr(E->getArg(0));
21111 Value *RHS = EmitScalarExpr(E->getArg(1));
21112 Function *Callee =
21113 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
21114 return Builder.CreateCall(Callee, {LHS, RHS});
21115 }
21116 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
21117 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
21118 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
21119 Value *LHS = EmitScalarExpr(E->getArg(0));
21120 Value *RHS = EmitScalarExpr(E->getArg(1));
21121 Function *Callee =
21122 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
21123 return Builder.CreateCall(Callee, {LHS, RHS});
21124 }
21125 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
21126 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
21127 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
21128 Value *LHS = EmitScalarExpr(E->getArg(0));
21129 Value *RHS = EmitScalarExpr(E->getArg(1));
21130 Function *Callee =
21131 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
21132 return Builder.CreateCall(Callee, {LHS, RHS});
21133 }
21134 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
21135 case WebAssembly::BI__builtin_wasm_floor_f32x4:
21136 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
21137 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
21138 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
21139 case WebAssembly::BI__builtin_wasm_floor_f64x2:
21140 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
21141 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
21142 unsigned IntNo;
21143 switch (BuiltinID) {
21144 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
21145 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
21146 IntNo = Intrinsic::ceil;
21147 break;
21148 case WebAssembly::BI__builtin_wasm_floor_f32x4:
21149 case WebAssembly::BI__builtin_wasm_floor_f64x2:
21150 IntNo = Intrinsic::floor;
21151 break;
21152 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
21153 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
21154 IntNo = Intrinsic::trunc;
21155 break;
21156 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
21157 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
21158 IntNo = Intrinsic::nearbyint;
21159 break;
21160 default:
21161 llvm_unreachable("unexpected builtin ID");
21162 }
21163 Value *Value = EmitScalarExpr(E->getArg(0));
21165 return Builder.CreateCall(Callee, Value);
21166 }
21167 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
21168 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
21169 return Builder.CreateCall(Callee);
21170 }
21171 case WebAssembly::BI__builtin_wasm_ref_null_func: {
21172 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
21173 return Builder.CreateCall(Callee);
21174 }
21175 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
21176 Value *Src = EmitScalarExpr(E->getArg(0));
21177 Value *Indices = EmitScalarExpr(E->getArg(1));
21178 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
21179 return Builder.CreateCall(Callee, {Src, Indices});
21180 }
21181 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
21182 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
21183 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
21184 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
21185 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
21186 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
21187 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
21188 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
21189 unsigned IntNo;
21190 switch (BuiltinID) {
21191 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
21192 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
21193 IntNo = Intrinsic::sadd_sat;
21194 break;
21195 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
21196 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
21197 IntNo = Intrinsic::uadd_sat;
21198 break;
21199 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
21200 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
21201 IntNo = Intrinsic::wasm_sub_sat_signed;
21202 break;
21203 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
21204 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
21205 IntNo = Intrinsic::wasm_sub_sat_unsigned;
21206 break;
21207 default:
21208 llvm_unreachable("unexpected builtin ID");
21209 }
21210 Value *LHS = EmitScalarExpr(E->getArg(0));
21211 Value *RHS = EmitScalarExpr(E->getArg(1));
21213 return Builder.CreateCall(Callee, {LHS, RHS});
21214 }
21215 case WebAssembly::BI__builtin_wasm_abs_i8x16:
21216 case WebAssembly::BI__builtin_wasm_abs_i16x8:
21217 case WebAssembly::BI__builtin_wasm_abs_i32x4:
21218 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
21219 Value *Vec = EmitScalarExpr(E->getArg(0));
21220 Value *Neg = Builder.CreateNeg(Vec, "neg");
21221 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
21222 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
21223 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
21224 }
21225 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
21226 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
21227 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
21228 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
21229 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
21230 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
21231 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
21232 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
21233 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
21234 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
21235 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
21236 case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
21237 Value *LHS = EmitScalarExpr(E->getArg(0));
21238 Value *RHS = EmitScalarExpr(E->getArg(1));
21239 Value *ICmp;
21240 switch (BuiltinID) {
21241 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
21242 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
21243 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
21244 ICmp = Builder.CreateICmpSLT(LHS, RHS);
21245 break;
21246 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
21247 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
21248 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
21249 ICmp = Builder.CreateICmpULT(LHS, RHS);
21250 break;
21251 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
21252 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
21253 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
21254 ICmp = Builder.CreateICmpSGT(LHS, RHS);
21255 break;
21256 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
21257 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
21258 case WebAssembly::BI__builtin_wasm_max_u_i32x4:
21259 ICmp = Builder.CreateICmpUGT(LHS, RHS);
21260 break;
21261 default:
21262 llvm_unreachable("unexpected builtin ID");
21263 }
21264 return Builder.CreateSelect(ICmp, LHS, RHS);
21265 }
21266 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
21267 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
21268 Value *LHS = EmitScalarExpr(E->getArg(0));
21269 Value *RHS = EmitScalarExpr(E->getArg(1));
21270 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
21271 ConvertType(E->getType()));
21272 return Builder.CreateCall(Callee, {LHS, RHS});
21273 }
21274 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
21275 Value *LHS = EmitScalarExpr(E->getArg(0));
21276 Value *RHS = EmitScalarExpr(E->getArg(1));
21277 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
21278 return Builder.CreateCall(Callee, {LHS, RHS});
21279 }
21280 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
21281 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
21282 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
21283 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
21284 Value *Vec = EmitScalarExpr(E->getArg(0));
21285 unsigned IntNo;
21286 switch (BuiltinID) {
21287 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
21288 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
21289 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
21290 break;
21291 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
21292 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
21293 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
21294 break;
21295 default:
21296 llvm_unreachable("unexpected builtin ID");
21297 }
21298
21300 return Builder.CreateCall(Callee, Vec);
21301 }
21302 case WebAssembly::BI__builtin_wasm_bitselect: {
21303 Value *V1 = EmitScalarExpr(E->getArg(0));
21304 Value *V2 = EmitScalarExpr(E->getArg(1));
21305 Value *C = EmitScalarExpr(E->getArg(2));
21306 Function *Callee =
21307 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
21308 return Builder.CreateCall(Callee, {V1, V2, C});
21309 }
21310 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
21311 Value *LHS = EmitScalarExpr(E->getArg(0));
21312 Value *RHS = EmitScalarExpr(E->getArg(1));
21313 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
21314 return Builder.CreateCall(Callee, {LHS, RHS});
21315 }
21316 case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
21317 Value *Vec = EmitScalarExpr(E->getArg(0));
21318 Function *Callee =
21319 CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
21320 return Builder.CreateCall(Callee, {Vec});
21321 }
21322 case WebAssembly::BI__builtin_wasm_any_true_v128:
21323 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21324 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21325 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21326 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
21327 unsigned IntNo;
21328 switch (BuiltinID) {
21329 case WebAssembly::BI__builtin_wasm_any_true_v128:
21330 IntNo = Intrinsic::wasm_anytrue;
21331 break;
21332 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21333 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21334 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21335 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
21336 IntNo = Intrinsic::wasm_alltrue;
21337 break;
21338 default:
21339 llvm_unreachable("unexpected builtin ID");
21340 }
21341 Value *Vec = EmitScalarExpr(E->getArg(0));
21342 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
21343 return Builder.CreateCall(Callee, {Vec});
21344 }
21345 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
21346 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
21347 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
21348 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
21349 Value *Vec = EmitScalarExpr(E->getArg(0));
21350 Function *Callee =
21351 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
21352 return Builder.CreateCall(Callee, {Vec});
21353 }
21354 case WebAssembly::BI__builtin_wasm_abs_f32x4:
21355 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
21356 Value *Vec = EmitScalarExpr(E->getArg(0));
21357 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
21358 return Builder.CreateCall(Callee, {Vec});
21359 }
21360 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
21361 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
21362 Value *Vec = EmitScalarExpr(E->getArg(0));
21363 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
21364 return Builder.CreateCall(Callee, {Vec});
21365 }
21366 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21367 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21368 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21369 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
21370 Value *Low = EmitScalarExpr(E->getArg(0));
21371 Value *High = EmitScalarExpr(E->getArg(1));
21372 unsigned IntNo;
21373 switch (BuiltinID) {
21374 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21375 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21376 IntNo = Intrinsic::wasm_narrow_signed;
21377 break;
21378 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21379 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
21380 IntNo = Intrinsic::wasm_narrow_unsigned;
21381 break;
21382 default:
21383 llvm_unreachable("unexpected builtin ID");
21384 }
21385 Function *Callee =
21386 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
21387 return Builder.CreateCall(Callee, {Low, High});
21388 }
21389 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21390 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
21391 Value *Vec = EmitScalarExpr(E->getArg(0));
21392 unsigned IntNo;
21393 switch (BuiltinID) {
21394 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21395 IntNo = Intrinsic::fptosi_sat;
21396 break;
21397 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
21398 IntNo = Intrinsic::fptoui_sat;
21399 break;
21400 default:
21401 llvm_unreachable("unexpected builtin ID");
21402 }
21403 llvm::Type *SrcT = Vec->getType();
21404 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
21405 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
21406 Value *Trunc = Builder.CreateCall(Callee, Vec);
21407 Value *Splat = Constant::getNullValue(TruncT);
21408 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
21409 }
21410 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
21411 Value *Ops[18];
21412 size_t OpIdx = 0;
21413 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
21414 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
21415 while (OpIdx < 18) {
21416 std::optional<llvm::APSInt> LaneConst =
21417 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
21418 assert(LaneConst && "Constant arg isn't actually constant?");
21419 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
21420 }
21421 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
21422 return Builder.CreateCall(Callee, Ops);
21423 }
21424 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
21425 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
21426 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21427 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21428 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21429 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
21430 Value *A = EmitScalarExpr(E->getArg(0));
21431 Value *B = EmitScalarExpr(E->getArg(1));
21432 Value *C = EmitScalarExpr(E->getArg(2));
21433 unsigned IntNo;
21434 switch (BuiltinID) {
21435 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
21436 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21437 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21438 IntNo = Intrinsic::wasm_relaxed_madd;
21439 break;
21440 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
21441 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21442 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
21443 IntNo = Intrinsic::wasm_relaxed_nmadd;
21444 break;
21445 default:
21446 llvm_unreachable("unexpected builtin ID");
21447 }
21448 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
21449 return Builder.CreateCall(Callee, {A, B, C});
21450 }
21451 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
21452 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
21453 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
21454 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
21455 Value *A = EmitScalarExpr(E->getArg(0));
21456 Value *B = EmitScalarExpr(E->getArg(1));
21457 Value *C = EmitScalarExpr(E->getArg(2));
21458 Function *Callee =
21459 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
21460 return Builder.CreateCall(Callee, {A, B, C});
21461 }
21462 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
21463 Value *Src = EmitScalarExpr(E->getArg(0));
21464 Value *Indices = EmitScalarExpr(E->getArg(1));
21465 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
21466 return Builder.CreateCall(Callee, {Src, Indices});
21467 }
21468 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21469 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21470 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21471 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
21472 Value *LHS = EmitScalarExpr(E->getArg(0));
21473 Value *RHS = EmitScalarExpr(E->getArg(1));
21474 unsigned IntNo;
21475 switch (BuiltinID) {
21476 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21477 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21478 IntNo = Intrinsic::wasm_relaxed_min;
21479 break;
21480 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21481 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
21482 IntNo = Intrinsic::wasm_relaxed_max;
21483 break;
21484 default:
21485 llvm_unreachable("unexpected builtin ID");
21486 }
21487 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
21488 return Builder.CreateCall(Callee, {LHS, RHS});
21489 }
21490 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21491 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21492 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21493 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
21494 Value *Vec = EmitScalarExpr(E->getArg(0));
21495 unsigned IntNo;
21496 switch (BuiltinID) {
21497 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21498 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
21499 break;
21500 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21501 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
21502 break;
21503 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21504 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
21505 break;
21506 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
21507 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
21508 break;
21509 default:
21510 llvm_unreachable("unexpected builtin ID");
21511 }
21512 Function *Callee = CGM.getIntrinsic(IntNo);
21513 return Builder.CreateCall(Callee, {Vec});
21514 }
21515 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
21516 Value *LHS = EmitScalarExpr(E->getArg(0));
21517 Value *RHS = EmitScalarExpr(E->getArg(1));
21518 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
21519 return Builder.CreateCall(Callee, {LHS, RHS});
21520 }
21521 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
21522 Value *LHS = EmitScalarExpr(E->getArg(0));
21523 Value *RHS = EmitScalarExpr(E->getArg(1));
21524 Function *Callee =
21525 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
21526 return Builder.CreateCall(Callee, {LHS, RHS});
21527 }
21528 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
21529 Value *LHS = EmitScalarExpr(E->getArg(0));
21530 Value *RHS = EmitScalarExpr(E->getArg(1));
21531 Value *Acc = EmitScalarExpr(E->getArg(2));
21532 Function *Callee =
21533 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
21534 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21535 }
21536 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
21537 Value *LHS = EmitScalarExpr(E->getArg(0));
21538 Value *RHS = EmitScalarExpr(E->getArg(1));
21539 Value *Acc = EmitScalarExpr(E->getArg(2));
21540 Function *Callee =
21541 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
21542 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21543 }
21544 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
21545 Value *Addr = EmitScalarExpr(E->getArg(0));
21546 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
21547 return Builder.CreateCall(Callee, {Addr});
21548 }
21549 case WebAssembly::BI__builtin_wasm_storef16_f32: {
21550 Value *Val = EmitScalarExpr(E->getArg(0));
21551 Value *Addr = EmitScalarExpr(E->getArg(1));
21552 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
21553 return Builder.CreateCall(Callee, {Val, Addr});
21554 }
21555 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
21556 Value *Val = EmitScalarExpr(E->getArg(0));
21557 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
21558 return Builder.CreateCall(Callee, {Val});
21559 }
21560 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
21561 Value *Vector = EmitScalarExpr(E->getArg(0));
21562 Value *Index = EmitScalarExpr(E->getArg(1));
21563 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
21564 return Builder.CreateCall(Callee, {Vector, Index});
21565 }
21566 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
21567 Value *Vector = EmitScalarExpr(E->getArg(0));
21568 Value *Index = EmitScalarExpr(E->getArg(1));
21569 Value *Val = EmitScalarExpr(E->getArg(2));
21570 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
21571 return Builder.CreateCall(Callee, {Vector, Index, Val});
21572 }
21573 case WebAssembly::BI__builtin_wasm_table_get: {
21574 assert(E->getArg(0)->getType()->isArrayType());
21575 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21576 Value *Index = EmitScalarExpr(E->getArg(1));
21579 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
21580 else if (E->getType().isWebAssemblyFuncrefType())
21581 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
21582 else
21583 llvm_unreachable(
21584 "Unexpected reference type for __builtin_wasm_table_get");
21585 return Builder.CreateCall(Callee, {Table, Index});
21586 }
21587 case WebAssembly::BI__builtin_wasm_table_set: {
21588 assert(E->getArg(0)->getType()->isArrayType());
21589 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21590 Value *Index = EmitScalarExpr(E->getArg(1));
21591 Value *Val = EmitScalarExpr(E->getArg(2));
21593 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
21594 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
21595 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21596 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
21597 else
21598 llvm_unreachable(
21599 "Unexpected reference type for __builtin_wasm_table_set");
21600 return Builder.CreateCall(Callee, {Table, Index, Val});
21601 }
21602 case WebAssembly::BI__builtin_wasm_table_size: {
21603 assert(E->getArg(0)->getType()->isArrayType());
21604 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21605 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
21606 return Builder.CreateCall(Callee, Value);
21607 }
21608 case WebAssembly::BI__builtin_wasm_table_grow: {
21609 assert(E->getArg(0)->getType()->isArrayType());
21610 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21611 Value *Val = EmitScalarExpr(E->getArg(1));
21612 Value *NElems = EmitScalarExpr(E->getArg(2));
21613
21615 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
21616 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
21617 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21618 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21619 else
21620 llvm_unreachable(
21621 "Unexpected reference type for __builtin_wasm_table_grow");
21622
21623 return Builder.CreateCall(Callee, {Table, Val, NElems});
21624 }
21625 case WebAssembly::BI__builtin_wasm_table_fill: {
21626 assert(E->getArg(0)->getType()->isArrayType());
21627 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21628 Value *Index = EmitScalarExpr(E->getArg(1));
21629 Value *Val = EmitScalarExpr(E->getArg(2));
21630 Value *NElems = EmitScalarExpr(E->getArg(3));
21631
21633 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
21634 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
21635 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21636 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21637 else
21638 llvm_unreachable(
21639 "Unexpected reference type for __builtin_wasm_table_fill");
21640
21641 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
21642 }
21643 case WebAssembly::BI__builtin_wasm_table_copy: {
21644 assert(E->getArg(0)->getType()->isArrayType());
21645 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21646 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
21647 Value *DstIdx = EmitScalarExpr(E->getArg(2));
21648 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
21649 Value *NElems = EmitScalarExpr(E->getArg(4));
21650
21651 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
21652
21653 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
21654 }
21655 default:
21656 return nullptr;
21657 }
21658}
21659
21660static std::pair<Intrinsic::ID, unsigned>
21662 struct Info {
21663 unsigned BuiltinID;
21664 Intrinsic::ID IntrinsicID;
21665 unsigned VecLen;
21666 };
21667 static Info Infos[] = {
21668#define CUSTOM_BUILTIN_MAPPING(x,s) \
21669 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
21670 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
21671 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
21672 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
21673 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
21674 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
21675 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
21676 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
21677 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
21678 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
21679 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
21680 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
21681 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
21682 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
21683 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
21684 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
21685 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
21686 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
21687 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
21688 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
21689 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
21690 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
21691 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
21692 // Legacy builtins that take a vector in place of a vector predicate.
21693 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
21694 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
21695 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
21696 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
21697 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
21698 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
21699 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
21700 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
21701#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
21702#undef CUSTOM_BUILTIN_MAPPING
21703 };
21704
21705 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
21706 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
21707 (void)SortOnce;
21708
21709 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
21710 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
21711 return {Intrinsic::not_intrinsic, 0};
21712
21713 return {F->IntrinsicID, F->VecLen};
21714}
21715
21717 const CallExpr *E) {
21718 Intrinsic::ID ID;
21719 unsigned VecLen;
21720 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
21721
21722 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
21723 // The base pointer is passed by address, so it needs to be loaded.
21724 Address A = EmitPointerWithAlignment(E->getArg(0));
21726 llvm::Value *Base = Builder.CreateLoad(BP);
21727 // The treatment of both loads and stores is the same: the arguments for
21728 // the builtin are the same as the arguments for the intrinsic.
21729 // Load:
21730 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
21731 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
21732 // Store:
21733 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
21734 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
21736 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
21737 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21738
21739 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
21740 // The load intrinsics generate two results (Value, NewBase), stores
21741 // generate one (NewBase). The new base address needs to be stored.
21742 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
21743 : Result;
21744 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
21745 Address Dest = EmitPointerWithAlignment(E->getArg(0));
21746 llvm::Value *RetVal =
21747 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
21748 if (IsLoad)
21749 RetVal = Builder.CreateExtractValue(Result, 0);
21750 return RetVal;
21751 };
21752
21753 // Handle the conversion of bit-reverse load intrinsics to bit code.
21754 // The intrinsic call after this function only reads from memory and the
21755 // write to memory is dealt by the store instruction.
21756 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
21757 // The intrinsic generates one result, which is the new value for the base
21758 // pointer. It needs to be returned. The result of the load instruction is
21759 // passed to intrinsic by address, so the value needs to be stored.
21760 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
21761
21762 // Expressions like &(*pt++) will be incremented per evaluation.
21763 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
21764 // per call.
21765 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
21766 DestAddr = DestAddr.withElementType(Int8Ty);
21767 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
21768
21769 // Operands are Base, Dest, Modifier.
21770 // The intrinsic format in LLVM IR is defined as
21771 // { ValueType, i8* } (i8*, i32).
21772 llvm::Value *Result = Builder.CreateCall(
21773 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
21774
21775 // The value needs to be stored as the variable is passed by reference.
21776 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
21777
21778 // The store needs to be truncated to fit the destination type.
21779 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
21780 // to be handled with stores of respective destination type.
21781 DestVal = Builder.CreateTrunc(DestVal, DestTy);
21782
21783 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
21784 // The updated value of the base pointer is returned.
21785 return Builder.CreateExtractValue(Result, 1);
21786 };
21787
21788 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
21789 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
21790 : Intrinsic::hexagon_V6_vandvrt;
21791 return Builder.CreateCall(CGM.getIntrinsic(ID),
21792 {Vec, Builder.getInt32(-1)});
21793 };
21794 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
21795 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
21796 : Intrinsic::hexagon_V6_vandqrt;
21797 return Builder.CreateCall(CGM.getIntrinsic(ID),
21798 {Pred, Builder.getInt32(-1)});
21799 };
21800
21801 switch (BuiltinID) {
21802 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
21803 // and the corresponding C/C++ builtins use loads/stores to update
21804 // the predicate.
21805 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
21806 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
21807 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
21808 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
21809 // Get the type from the 0-th argument.
21810 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21811 Address PredAddr =
21812 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
21813 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
21814 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21815 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
21816
21817 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21818 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21819 PredAddr.getAlignment());
21820 return Builder.CreateExtractValue(Result, 0);
21821 }
21822 // These are identical to the builtins above, except they don't consume
21823 // input carry, only generate carry-out. Since they still produce two
21824 // outputs, generate the store of the predicate, but no load.
21825 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
21826 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
21827 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
21828 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
21829 // Get the type from the 0-th argument.
21830 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21831 Address PredAddr =
21832 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
21833 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21834 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21835
21836 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21837 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21838 PredAddr.getAlignment());
21839 return Builder.CreateExtractValue(Result, 0);
21840 }
21841
21842 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
21843 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
21844 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
21845 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
21846 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
21847 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
21848 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
21849 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
21851 const Expr *PredOp = E->getArg(0);
21852 // There will be an implicit cast to a boolean vector. Strip it.
21853 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
21854 if (Cast->getCastKind() == CK_BitCast)
21855 PredOp = Cast->getSubExpr();
21856 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
21857 }
21858 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
21859 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21860 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
21861 }
21862
21863 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
21864 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
21865 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
21866 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
21867 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
21868 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
21869 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
21870 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
21871 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
21872 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
21873 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
21874 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
21875 return MakeCircOp(ID, /*IsLoad=*/true);
21876 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
21877 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
21878 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
21879 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
21880 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
21881 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
21882 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
21883 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
21884 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
21885 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
21886 return MakeCircOp(ID, /*IsLoad=*/false);
21887 case Hexagon::BI__builtin_brev_ldub:
21888 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
21889 case Hexagon::BI__builtin_brev_ldb:
21890 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
21891 case Hexagon::BI__builtin_brev_lduh:
21892 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
21893 case Hexagon::BI__builtin_brev_ldh:
21894 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
21895 case Hexagon::BI__builtin_brev_ldw:
21896 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
21897 case Hexagon::BI__builtin_brev_ldd:
21898 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
21899 } // switch
21900
21901 return nullptr;
21902}
21903
21905 const CallExpr *E,
21906 ReturnValueSlot ReturnValue) {
21907
21908 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
21909 return EmitRISCVCpuSupports(E);
21910 if (BuiltinID == Builtin::BI__builtin_cpu_init)
21911 return EmitRISCVCpuInit();
21912
21914 llvm::Type *ResultType = ConvertType(E->getType());
21915
21916 // Find out if any arguments are required to be integer constant expressions.
21917 unsigned ICEArguments = 0;
21919 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
21920 if (Error == ASTContext::GE_Missing_type) {
21921 // Vector intrinsics don't have a type string.
21922 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
21923 BuiltinID <= clang::RISCV::LastRVVBuiltin);
21924 ICEArguments = 0;
21925 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
21926 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
21927 ICEArguments = 1 << 1;
21928 } else {
21929 assert(Error == ASTContext::GE_None && "Unexpected error");
21930 }
21931
21932 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
21933 ICEArguments |= (1 << 1);
21934 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
21935 ICEArguments |= (1 << 2);
21936
21937 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
21938 // Handle aggregate argument, namely RVV tuple types in segment load/store
21939 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
21940 LValue L = EmitAggExprToLValue(E->getArg(i));
21941 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());
21942 Ops.push_back(AggValue);
21943 continue;
21944 }
21945 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
21946 }
21947
21948 Intrinsic::ID ID = Intrinsic::not_intrinsic;
21949 unsigned NF = 1;
21950 // The 0th bit simulates the `vta` of RVV
21951 // The 1st bit simulates the `vma` of RVV
21952 constexpr unsigned RVV_VTA = 0x1;
21953 constexpr unsigned RVV_VMA = 0x2;
21954 int PolicyAttrs = 0;
21955 bool IsMasked = false;
21956
21957 // Required for overloaded intrinsics.
21959 switch (BuiltinID) {
21960 default: llvm_unreachable("unexpected builtin ID");
21961 case RISCV::BI__builtin_riscv_orc_b_32:
21962 case RISCV::BI__builtin_riscv_orc_b_64:
21963 case RISCV::BI__builtin_riscv_clz_32:
21964 case RISCV::BI__builtin_riscv_clz_64:
21965 case RISCV::BI__builtin_riscv_ctz_32:
21966 case RISCV::BI__builtin_riscv_ctz_64:
21967 case RISCV::BI__builtin_riscv_clmul_32:
21968 case RISCV::BI__builtin_riscv_clmul_64:
21969 case RISCV::BI__builtin_riscv_clmulh_32:
21970 case RISCV::BI__builtin_riscv_clmulh_64:
21971 case RISCV::BI__builtin_riscv_clmulr_32:
21972 case RISCV::BI__builtin_riscv_clmulr_64:
21973 case RISCV::BI__builtin_riscv_xperm4_32:
21974 case RISCV::BI__builtin_riscv_xperm4_64:
21975 case RISCV::BI__builtin_riscv_xperm8_32:
21976 case RISCV::BI__builtin_riscv_xperm8_64:
21977 case RISCV::BI__builtin_riscv_brev8_32:
21978 case RISCV::BI__builtin_riscv_brev8_64:
21979 case RISCV::BI__builtin_riscv_zip_32:
21980 case RISCV::BI__builtin_riscv_unzip_32: {
21981 switch (BuiltinID) {
21982 default: llvm_unreachable("unexpected builtin ID");
21983 // Zbb
21984 case RISCV::BI__builtin_riscv_orc_b_32:
21985 case RISCV::BI__builtin_riscv_orc_b_64:
21986 ID = Intrinsic::riscv_orc_b;
21987 break;
21988 case RISCV::BI__builtin_riscv_clz_32:
21989 case RISCV::BI__builtin_riscv_clz_64: {
21990 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
21991 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
21992 if (Result->getType() != ResultType)
21993 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
21994 "cast");
21995 return Result;
21996 }
21997 case RISCV::BI__builtin_riscv_ctz_32:
21998 case RISCV::BI__builtin_riscv_ctz_64: {
21999 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
22000 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
22001 if (Result->getType() != ResultType)
22002 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
22003 "cast");
22004 return Result;
22005 }
22006
22007 // Zbc
22008 case RISCV::BI__builtin_riscv_clmul_32:
22009 case RISCV::BI__builtin_riscv_clmul_64:
22010 ID = Intrinsic::riscv_clmul;
22011 break;
22012 case RISCV::BI__builtin_riscv_clmulh_32:
22013 case RISCV::BI__builtin_riscv_clmulh_64:
22014 ID = Intrinsic::riscv_clmulh;
22015 break;
22016 case RISCV::BI__builtin_riscv_clmulr_32:
22017 case RISCV::BI__builtin_riscv_clmulr_64:
22018 ID = Intrinsic::riscv_clmulr;
22019 break;
22020
22021 // Zbkx
22022 case RISCV::BI__builtin_riscv_xperm8_32:
22023 case RISCV::BI__builtin_riscv_xperm8_64:
22024 ID = Intrinsic::riscv_xperm8;
22025 break;
22026 case RISCV::BI__builtin_riscv_xperm4_32:
22027 case RISCV::BI__builtin_riscv_xperm4_64:
22028 ID = Intrinsic::riscv_xperm4;
22029 break;
22030
22031 // Zbkb
22032 case RISCV::BI__builtin_riscv_brev8_32:
22033 case RISCV::BI__builtin_riscv_brev8_64:
22034 ID = Intrinsic::riscv_brev8;
22035 break;
22036 case RISCV::BI__builtin_riscv_zip_32:
22037 ID = Intrinsic::riscv_zip;
22038 break;
22039 case RISCV::BI__builtin_riscv_unzip_32:
22040 ID = Intrinsic::riscv_unzip;
22041 break;
22042 }
22043
22044 IntrinsicTypes = {ResultType};
22045 break;
22046 }
22047
22048 // Zk builtins
22049
22050 // Zknh
22051 case RISCV::BI__builtin_riscv_sha256sig0:
22052 ID = Intrinsic::riscv_sha256sig0;
22053 break;
22054 case RISCV::BI__builtin_riscv_sha256sig1:
22055 ID = Intrinsic::riscv_sha256sig1;
22056 break;
22057 case RISCV::BI__builtin_riscv_sha256sum0:
22058 ID = Intrinsic::riscv_sha256sum0;
22059 break;
22060 case RISCV::BI__builtin_riscv_sha256sum1:
22061 ID = Intrinsic::riscv_sha256sum1;
22062 break;
22063
22064 // Zksed
22065 case RISCV::BI__builtin_riscv_sm4ks:
22066 ID = Intrinsic::riscv_sm4ks;
22067 break;
22068 case RISCV::BI__builtin_riscv_sm4ed:
22069 ID = Intrinsic::riscv_sm4ed;
22070 break;
22071
22072 // Zksh
22073 case RISCV::BI__builtin_riscv_sm3p0:
22074 ID = Intrinsic::riscv_sm3p0;
22075 break;
22076 case RISCV::BI__builtin_riscv_sm3p1:
22077 ID = Intrinsic::riscv_sm3p1;
22078 break;
22079
22080 // Zihintntl
22081 case RISCV::BI__builtin_riscv_ntl_load: {
22082 llvm::Type *ResTy = ConvertType(E->getType());
22083 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
22084 if (Ops.size() == 2)
22085 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
22086
22087 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
22089 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
22090 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
22091 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
22092
22093 int Width;
22094 if(ResTy->isScalableTy()) {
22095 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
22096 llvm::Type *ScalarTy = ResTy->getScalarType();
22097 Width = ScalarTy->getPrimitiveSizeInBits() *
22098 SVTy->getElementCount().getKnownMinValue();
22099 } else
22100 Width = ResTy->getPrimitiveSizeInBits();
22101 LoadInst *Load = Builder.CreateLoad(
22102 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
22103
22104 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
22105 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
22106 RISCVDomainNode);
22107
22108 return Load;
22109 }
22110 case RISCV::BI__builtin_riscv_ntl_store: {
22111 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
22112 if (Ops.size() == 3)
22113 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
22114
22115 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
22117 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
22118 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
22119 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
22120
22121 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
22122 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
22123 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
22124 RISCVDomainNode);
22125
22126 return Store;
22127 }
22128
22129 // Vector builtins are handled from here.
22130#include "clang/Basic/riscv_vector_builtin_cg.inc"
22131 // SiFive Vector builtins are handled from here.
22132#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
22133 }
22134
22135 assert(ID != Intrinsic::not_intrinsic);
22136
22137 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
22138 return Builder.CreateCall(F, Ops, "");
22139}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3338
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
static constexpr SparcCPUInfo CPUInfo[]
Definition: Sparc.cpp:67
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8487
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9323
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:213
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1200
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6513
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:392
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2066
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2032
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6382
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2523
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9293
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:810
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9286
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:7528
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9513
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7540
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7510
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:8555
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2401
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:445
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:746
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:495
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6509
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7541
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1398
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:7545
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:870
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:613
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2429
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:469
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:778
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9282
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7542
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9360
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6267
constexpr unsigned SVEBitsPerBlock
Definition: CGBuiltin.cpp:9797
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1783
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7352
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6506
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:512
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:265
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9387
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1629
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1483
Value * emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:589
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1261
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6518
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:659
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:650
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2259
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:818
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1337
@ UnsignedAlts
Definition: CGBuiltin.cpp:6476
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6481
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6485
@ Use64BitVectors
Definition: CGBuiltin.cpp:6478
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6473
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6483
@ InventFloatType
Definition: CGBuiltin.cpp:6475
@ AddRetType
Definition: CGBuiltin.cpp:6468
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6470
@ VectorizeRetType
Definition: CGBuiltin.cpp:6472
@ VectorRet
Definition: CGBuiltin.cpp:6482
@ Add1ArgType
Definition: CGBuiltin.cpp:6469
@ Use128BitVectors
Definition: CGBuiltin.cpp:6479
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9349
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:549
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:764
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2313
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition: CGBuiltin.cpp:9799
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2452
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6341
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:256
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9375
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:184
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8466
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:74
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8458
@ VolatileRead
Definition: CGBuiltin.cpp:8460
@ NormalRead
Definition: CGBuiltin.cpp:8459
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:350
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:195
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2301
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:310
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:529
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:173
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9315
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7537
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6837
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:237
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:731
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:7603
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:601
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:631
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:481
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2534
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1210
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2267
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:568
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:688
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:248
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1246
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8385
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2026
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:456
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7539
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:7112
CodeGenFunction::ComplexPairTy ComplexPairTy
const Decl * D
Expr * E
const Environment & Env
Definition: HTMLLogger.cpp:148
unsigned Iter
Definition: HTMLLogger.cpp:154
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:143
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
SourceRange Range
Definition: SemaObjC.cpp:757
SourceLocation Loc
Definition: SemaObjC.cpp:758
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:423
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:186
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1145
IdentifierTable & Idents
Definition: ASTContext.h:659
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:661
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
CanQualType IntTy
Definition: ASTContext.h:1127
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2114
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2672
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2391
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1118
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:778
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2293
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2296
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
CharUnits getSize() const
getSize - Get the record size in characters.
Definition: RecordLayout.h:193
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3540
QualType getElementType() const
Definition: Type.h:3552
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:149
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:247
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:251
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:122
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2830
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
An aggregate value slot.
Definition: CGValue.h:504
Address getAddress() const
Definition: CGValue.h:644
A scoped helper to set the current debug location to the specified location or preferred location of ...
Definition: CGDebugInfo.h:852
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:892
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:909
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:135
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:142
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:291
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:387
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:379
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:179
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:403
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:150
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:396
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:168
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:107
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:314
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:435
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:363
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:127
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:188
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:260
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:344
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:259
All available information about a concrete callee.
Definition: CGCall.h:63
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:137
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
llvm::DILocation * CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg)
Create a debug location from TrapLocation that adds an artificial inline frame where the frame name i...
CGFunctionInfo - Class to encapsulate the information about a function definition.
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:274
void add(RValue rvalue, QualType type)
Definition: CGCall.h:298
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitRISCVCpuSupports(const CallExpr *E)
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **callOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
Address makeNaturalAddressForPointer(llvm::Value *Ptr, QualType T, CharUnits Alignment=CharUnits::Zero(), bool ForPointeeType=false, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
Construct an address with the natural alignment of T.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * FormSVEBuiltinResult(llvm::Value *Call)
FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider vector.
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
llvm::Value * EmitCountedByFieldExpr(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
const FieldDecl * FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset)
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E)
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV, QualType Type, SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
const FieldDecl * FindCountedByField(const FieldDecl *FD)
Find the FieldDecl specified in a FAM's "counted_by" attribute.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getRawFunctionPointer(GlobalDecl GD, llvm::Type *Ty=nullptr)
Return a function pointer for a reference to the given function.
Definition: CGExpr.cpp:2878
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:99
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
const LangOptions & getLangOpts() const
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=std::nullopt)
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1632
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:680
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:668
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:368
LValue - This represents an lvalue references.
Definition: CGValue.h:182
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:419
llvm::Value * getPointer(CodeGenFunction &CGF) const
Address getAddress() const
Definition: CGValue.h:361
void setNontemporal(bool Value)
Definition: CGValue.h:319
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
static RValue getIgnored()
Definition: CGValue.h:93
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:125
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:372
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:152
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:142
const T & getABIInfo() const
Definition: TargetInfo.h:57
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:124
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:161
Complex values, per C99 6.2.5p11.
Definition: Type.h:3108
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:4189
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:1990
T * getAttr() const
Definition: DeclBase.h:579
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:599
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:249
DeclContext * getDeclContext()
Definition: DeclBase.h:454
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:435
bool hasAttr() const
Definition: DeclBase.h:583
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:192
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1547
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3075
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3070
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3066
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:830
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3567
std::optional< std::string > tryEvaluateString(ASTContext &Ctx) const
If the current Expr can be evaluated to a pointer to a null-terminated constant string,...
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3050
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3941
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:226
Represents difference between two FPOptions values.
Definition: LangOptions.h:919
Represents a member of a struct/union/class.
Definition: Decl.h:3030
Represents a function declaration or definition.
Definition: Decl.h:1932
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2669
unsigned getBuiltinID(bool ConsiderWrapperFunctions=false) const
Returns a value indicating whether this function corresponds to a builtin function.
Definition: Decl.cpp:3620
Represents a prototype with parameter type info, e.g.
Definition: Type.h:4973
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5367
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:292
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:7592
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3161
QualType getPointeeType() const
Definition: Type.h:3171
A (possibly-)qualified type.
Definition: Type.h:941
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:7827
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2845
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:7869
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2841
The collection of all-type qualifiers we support.
Definition: Type.h:319
Represents a struct/union/class.
Definition: Decl.h:4141
field_range fields() const
Definition: Decl.h:4347
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
bool isUnion() const
Definition: Decl.h:3763
Exposes information about the current target.
Definition: TargetInfo.h:218
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:312
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:687
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1256
bool isLittleEndian() const
Definition: TargetInfo.h:1666
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:851
bool isBigEndian() const
Definition: TargetInfo.h:1665
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1672
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:723
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1829
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1882
bool isBlockPointerType() const
Definition: Type.h:8006
bool isVoidType() const
Definition: Type.h:8295
bool isBooleanType() const
Definition: Type.h:8423
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2146
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:677
bool isArrayType() const
Definition: Type.h:8064
bool isCountAttributedType() const
Definition: Type.cpp:694
bool isPointerType() const
Definition: Type.h:7996
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8335
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8583
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:705
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8410
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2236
bool isBitIntType() const
Definition: Type.h:8230
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2186
bool isObjCObjectPointerType() const
Definition: Type.h:8134
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2258
bool isFloatingType() const
Definition: Type.cpp:2249
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2196
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8516
bool isRecordType() const
Definition: Type.h:8092
bool isSizelessVectorType() const
Returns true for all scalable vector types.
Definition: Type.cpp:2476
QualType getSizelessVectorEltType(const ASTContext &Ctx) const
Returns the representative type for the element of a sizeless vector builtin type.
Definition: Type.cpp:2533
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1886
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:667
QualType getType() const
Definition: Decl.h:678
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:3991
unsigned getNumElements() const
Definition: Type.h:4006
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:181
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: Integral.h:29
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1150
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.h:275
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2204
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:407
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:676
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1663
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2013
The JSON file list parser is used to communicate input to InstallAPI.
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:151
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
const FunctionProtoType * T
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
unsigned long uint64_t
long int64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:25
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:176
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:168
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:159
uint64_t Width
Definition: ASTContext.h:157
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742