clang 20.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CodeGenFunction.h"
21#include "CodeGenModule.h"
22#include "ConstantEmitter.h"
23#include "PatternInit.h"
24#include "TargetInfo.h"
26#include "clang/AST/Attr.h"
27#include "clang/AST/Decl.h"
28#include "clang/AST/OSLog.h"
35#include "llvm/ADT/APFloat.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/FloatingPointMode.h"
38#include "llvm/ADT/SmallPtrSet.h"
39#include "llvm/ADT/StringExtras.h"
40#include "llvm/Analysis/ValueTracking.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/IR/Intrinsics.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/IntrinsicsAMDGPU.h"
46#include "llvm/IR/IntrinsicsARM.h"
47#include "llvm/IR/IntrinsicsBPF.h"
48#include "llvm/IR/IntrinsicsDirectX.h"
49#include "llvm/IR/IntrinsicsHexagon.h"
50#include "llvm/IR/IntrinsicsNVPTX.h"
51#include "llvm/IR/IntrinsicsPowerPC.h"
52#include "llvm/IR/IntrinsicsR600.h"
53#include "llvm/IR/IntrinsicsRISCV.h"
54#include "llvm/IR/IntrinsicsS390.h"
55#include "llvm/IR/IntrinsicsVE.h"
56#include "llvm/IR/IntrinsicsWebAssembly.h"
57#include "llvm/IR/IntrinsicsX86.h"
58#include "llvm/IR/MDBuilder.h"
59#include "llvm/IR/MatrixBuilder.h"
60#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
61#include "llvm/Support/AMDGPUAddrSpace.h"
62#include "llvm/Support/ConvertUTF.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Support/ScopedPrinter.h"
65#include "llvm/TargetParser/AArch64TargetParser.h"
66#include "llvm/TargetParser/RISCVISAInfo.h"
67#include "llvm/TargetParser/X86TargetParser.h"
68#include <optional>
69#include <sstream>
70
71using namespace clang;
72using namespace CodeGen;
73using namespace llvm;
74
75static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
76 Align AlignmentInBytes) {
77 ConstantInt *Byte;
78 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
79 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
80 // Nothing to initialize.
81 return;
82 case LangOptions::TrivialAutoVarInitKind::Zero:
83 Byte = CGF.Builder.getInt8(0x00);
84 break;
85 case LangOptions::TrivialAutoVarInitKind::Pattern: {
86 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
87 Byte = llvm::dyn_cast<llvm::ConstantInt>(
88 initializationPatternFor(CGF.CGM, Int8));
89 break;
90 }
91 }
92 if (CGF.CGM.stopAutoInit())
93 return;
94 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
95 I->addAnnotationMetadata("auto-init");
96}
97
98/// getBuiltinLibFunction - Given a builtin id for a function like
99/// "__builtin_fabsf", return a Function* for "fabsf".
101 unsigned BuiltinID) {
102 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
103
104 // Get the name, skip over the __builtin_ prefix (if necessary).
105 StringRef Name;
106 GlobalDecl D(FD);
107
108 // TODO: This list should be expanded or refactored after all GCC-compatible
109 // std libcall builtins are implemented.
110 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
111 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
112 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
113 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
114 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
115 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
116 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
117 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
118 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
119 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
120 {Builtin::BI__builtin_printf, "__printfieee128"},
121 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
122 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
123 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
124 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
125 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
126 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
127 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
128 {Builtin::BI__builtin_scanf, "__scanfieee128"},
129 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
130 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
131 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
132 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
133 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
134 };
135
136 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
137 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
138 // if it is 64-bit 'long double' mode.
139 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
140 {Builtin::BI__builtin_frexpl, "frexp"},
141 {Builtin::BI__builtin_ldexpl, "ldexp"},
142 {Builtin::BI__builtin_modfl, "modf"},
143 };
144
145 // If the builtin has been declared explicitly with an assembler label,
146 // use the mangled name. This differs from the plain label on platforms
147 // that prefix labels.
148 if (FD->hasAttr<AsmLabelAttr>())
149 Name = getMangledName(D);
150 else {
151 // TODO: This mutation should also be applied to other targets other than
152 // PPC, after backend supports IEEE 128-bit style libcalls.
153 if (getTriple().isPPC64() &&
154 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
155 F128Builtins.contains(BuiltinID))
156 Name = F128Builtins[BuiltinID];
157 else if (getTriple().isOSAIX() &&
158 &getTarget().getLongDoubleFormat() ==
159 &llvm::APFloat::IEEEdouble() &&
160 AIXLongDouble64Builtins.contains(BuiltinID))
161 Name = AIXLongDouble64Builtins[BuiltinID];
162 else
163 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
164 }
165
166 llvm::FunctionType *Ty =
167 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
168
169 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
170}
171
172/// Emit the conversions required to turn the given value into an
173/// integer of the given size.
174static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
175 QualType T, llvm::IntegerType *IntType) {
176 V = CGF.EmitToMemory(V, T);
177
178 if (V->getType()->isPointerTy())
179 return CGF.Builder.CreatePtrToInt(V, IntType);
180
181 assert(V->getType() == IntType);
182 return V;
183}
184
185static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
186 QualType T, llvm::Type *ResultType) {
187 V = CGF.EmitFromMemory(V, T);
188
189 if (ResultType->isPointerTy())
190 return CGF.Builder.CreateIntToPtr(V, ResultType);
191
192 assert(V->getType() == ResultType);
193 return V;
194}
195
197 ASTContext &Ctx = CGF.getContext();
198 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
199 unsigned Bytes = Ptr.getElementType()->isPointerTy()
201 : Ptr.getElementType()->getScalarSizeInBits() / 8;
202 unsigned Align = Ptr.getAlignment().getQuantity();
203 if (Align % Bytes != 0) {
204 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
205 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
206 // Force address to be at least naturally-aligned.
207 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
208 }
209 return Ptr;
210}
211
212/// Utility to insert an atomic instruction based on Intrinsic::ID
213/// and the expression node.
215 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
216 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
217
218 QualType T = E->getType();
219 assert(E->getArg(0)->getType()->isPointerType());
221 E->getArg(0)->getType()->getPointeeType()));
222 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
223
224 Address DestAddr = CheckAtomicAlignment(CGF, E);
225
226 llvm::IntegerType *IntType = llvm::IntegerType::get(
227 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
228
229 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
230 llvm::Type *ValueType = Val->getType();
231 Val = EmitToInt(CGF, Val, T, IntType);
232
233 llvm::Value *Result =
234 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
235 return EmitFromInt(CGF, Result, T, ValueType);
236}
237
239 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
240 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
241
242 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
243 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
244 LV.setNontemporal(true);
245 CGF.EmitStoreOfScalar(Val, LV, false);
246 return nullptr;
247}
248
250 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
251
252 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
253 LV.setNontemporal(true);
254 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
255}
256
258 llvm::AtomicRMWInst::BinOp Kind,
259 const CallExpr *E) {
260 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
261}
262
263/// Utility to insert an atomic instruction based Intrinsic::ID and
264/// the expression node, where the return value is the result of the
265/// operation.
267 llvm::AtomicRMWInst::BinOp Kind,
268 const CallExpr *E,
269 Instruction::BinaryOps Op,
270 bool Invert = false) {
271 QualType T = E->getType();
272 assert(E->getArg(0)->getType()->isPointerType());
274 E->getArg(0)->getType()->getPointeeType()));
275 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
276
277 Address DestAddr = CheckAtomicAlignment(CGF, E);
278
279 llvm::IntegerType *IntType = llvm::IntegerType::get(
280 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
281
282 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
283 llvm::Type *ValueType = Val->getType();
284 Val = EmitToInt(CGF, Val, T, IntType);
285
286 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
287 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
288 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
289 if (Invert)
290 Result =
291 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
292 llvm::ConstantInt::getAllOnesValue(IntType));
293 Result = EmitFromInt(CGF, Result, T, ValueType);
294 return RValue::get(Result);
295}
296
297/// Utility to insert an atomic cmpxchg instruction.
298///
299/// @param CGF The current codegen function.
300/// @param E Builtin call expression to convert to cmpxchg.
301/// arg0 - address to operate on
302/// arg1 - value to compare with
303/// arg2 - new value
304/// @param ReturnBool Specifies whether to return success flag of
305/// cmpxchg result or the old value.
306///
307/// @returns result of cmpxchg, according to ReturnBool
308///
309/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
310/// invoke the function EmitAtomicCmpXchgForMSIntrin.
312 bool ReturnBool) {
313 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
314 Address DestAddr = CheckAtomicAlignment(CGF, E);
315
316 llvm::IntegerType *IntType = llvm::IntegerType::get(
317 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
318
319 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
320 llvm::Type *ValueType = Cmp->getType();
321 Cmp = EmitToInt(CGF, Cmp, T, IntType);
322 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
323
325 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
326 llvm::AtomicOrdering::SequentiallyConsistent);
327 if (ReturnBool)
328 // Extract boolean success flag and zext it to int.
329 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
330 CGF.ConvertType(E->getType()));
331 else
332 // Extract old value and emit it using the same type as compare value.
333 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
334 ValueType);
335}
336
337/// This function should be invoked to emit atomic cmpxchg for Microsoft's
338/// _InterlockedCompareExchange* intrinsics which have the following signature:
339/// T _InterlockedCompareExchange(T volatile *Destination,
340/// T Exchange,
341/// T Comparand);
342///
343/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
344/// cmpxchg *Destination, Comparand, Exchange.
345/// So we need to swap Comparand and Exchange when invoking
346/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
347/// function MakeAtomicCmpXchgValue since it expects the arguments to be
348/// already swapped.
349
350static
352 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
353 assert(E->getArg(0)->getType()->isPointerType());
355 E->getType(), E->getArg(0)->getType()->getPointeeType()));
357 E->getArg(1)->getType()));
359 E->getArg(2)->getType()));
360
361 Address DestAddr = CheckAtomicAlignment(CGF, E);
362
363 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
364 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
365
366 // For Release ordering, the failure ordering should be Monotonic.
367 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
368 AtomicOrdering::Monotonic :
369 SuccessOrdering;
370
371 // The atomic instruction is marked volatile for consistency with MSVC. This
372 // blocks the few atomics optimizations that LLVM has. If we want to optimize
373 // _Interlocked* operations in the future, we will have to remove the volatile
374 // marker.
376 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
377 Result->setVolatile(true);
378 return CGF.Builder.CreateExtractValue(Result, 0);
379}
380
381// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
382// prototyped like this:
383//
384// unsigned char _InterlockedCompareExchange128...(
385// __int64 volatile * _Destination,
386// __int64 _ExchangeHigh,
387// __int64 _ExchangeLow,
388// __int64 * _ComparandResult);
389//
390// Note that Destination is assumed to be at least 16-byte aligned, despite
391// being typed int64.
392
394 const CallExpr *E,
395 AtomicOrdering SuccessOrdering) {
396 assert(E->getNumArgs() == 4);
397 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
398 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
399 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
400 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
401
402 assert(DestPtr->getType()->isPointerTy());
403 assert(!ExchangeHigh->getType()->isPointerTy());
404 assert(!ExchangeLow->getType()->isPointerTy());
405
406 // For Release ordering, the failure ordering should be Monotonic.
407 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
408 ? AtomicOrdering::Monotonic
409 : SuccessOrdering;
410
411 // Convert to i128 pointers and values. Alignment is also overridden for
412 // destination pointer.
413 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
414 Address DestAddr(DestPtr, Int128Ty,
416 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
417
418 // (((i128)hi) << 64) | ((i128)lo)
419 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
420 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
421 ExchangeHigh =
422 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
423 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
424
425 // Load the comparand for the instruction.
426 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
427
428 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
429 SuccessOrdering, FailureOrdering);
430
431 // The atomic instruction is marked volatile for consistency with MSVC. This
432 // blocks the few atomics optimizations that LLVM has. If we want to optimize
433 // _Interlocked* operations in the future, we will have to remove the volatile
434 // marker.
435 CXI->setVolatile(true);
436
437 // Store the result as an outparameter.
438 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
439 ComparandAddr);
440
441 // Get the success boolean and zero extend it to i8.
442 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
443 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
444}
445
447 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
448 assert(E->getArg(0)->getType()->isPointerType());
449
450 auto *IntTy = CGF.ConvertType(E->getType());
451 Address DestAddr = CheckAtomicAlignment(CGF, E);
452 auto *Result = CGF.Builder.CreateAtomicRMW(
453 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
454 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
455}
456
458 CodeGenFunction &CGF, const CallExpr *E,
459 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
460 assert(E->getArg(0)->getType()->isPointerType());
461
462 auto *IntTy = CGF.ConvertType(E->getType());
463 Address DestAddr = CheckAtomicAlignment(CGF, E);
464 auto *Result = CGF.Builder.CreateAtomicRMW(
465 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
466 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
467}
468
469// Build a plain volatile load.
471 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
472 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
473 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
474 llvm::Type *ITy =
475 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
476 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
477 Load->setVolatile(true);
478 return Load;
479}
480
481// Build a plain volatile store.
483 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
484 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
485 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
486 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
487 llvm::StoreInst *Store =
488 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
489 Store->setVolatile(true);
490 return Store;
491}
492
493// Emit a simple mangled intrinsic that has 1 argument and a return type
494// matching the argument type. Depending on mode, this may be a constrained
495// floating-point intrinsic.
497 const CallExpr *E, unsigned IntrinsicID,
498 unsigned ConstrainedIntrinsicID) {
499 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
500
501 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
502 if (CGF.Builder.getIsFPConstrained()) {
503 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
504 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
505 } else {
506 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
507 return CGF.Builder.CreateCall(F, Src0);
508 }
509}
510
511// Emit an intrinsic that has 2 operands of the same type as its result.
512// Depending on mode, this may be a constrained floating-point intrinsic.
514 const CallExpr *E, unsigned IntrinsicID,
515 unsigned ConstrainedIntrinsicID) {
516 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
517 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
518
519 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
520 if (CGF.Builder.getIsFPConstrained()) {
521 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
522 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
523 } else {
524 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
525 return CGF.Builder.CreateCall(F, { Src0, Src1 });
526 }
527}
528
529// Has second type mangled argument.
531 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
532 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
533 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
534 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
535
536 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
537 if (CGF.Builder.getIsFPConstrained()) {
538 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
539 {Src0->getType(), Src1->getType()});
540 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
541 }
542
543 Function *F =
544 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
545 return CGF.Builder.CreateCall(F, {Src0, Src1});
546}
547
548// Emit an intrinsic that has 3 operands of the same type as its result.
549// Depending on mode, this may be a constrained floating-point intrinsic.
551 const CallExpr *E, unsigned IntrinsicID,
552 unsigned ConstrainedIntrinsicID) {
553 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
554 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
555 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
556
557 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
558 if (CGF.Builder.getIsFPConstrained()) {
559 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
560 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
561 } else {
562 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
563 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
564 }
565}
566
567// Emit an intrinsic where all operands are of the same type as the result.
568// Depending on mode, this may be a constrained floating-point intrinsic.
570 unsigned IntrinsicID,
571 unsigned ConstrainedIntrinsicID,
572 llvm::Type *Ty,
573 ArrayRef<Value *> Args) {
574 Function *F;
575 if (CGF.Builder.getIsFPConstrained())
576 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
577 else
578 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
579
580 if (CGF.Builder.getIsFPConstrained())
581 return CGF.Builder.CreateConstrainedFPCall(F, Args);
582 else
583 return CGF.Builder.CreateCall(F, Args);
584}
585
586// Emit a simple intrinsic that has N scalar arguments and a return type
587// matching the argument type. It is assumed that only the first argument is
588// overloaded.
589template <unsigned N>
591 unsigned IntrinsicID,
592 llvm::StringRef Name = "") {
593 static_assert(N, "expect non-empty argument");
595 for (unsigned I = 0; I < N; ++I)
596 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
597 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
598 return CGF.Builder.CreateCall(F, Args, Name);
599}
600
601// Emit an intrinsic that has 1 float or double operand, and 1 integer.
603 const CallExpr *E,
604 unsigned IntrinsicID) {
605 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
606 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
607
608 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
609 return CGF.Builder.CreateCall(F, {Src0, Src1});
610}
611
612// Emit an intrinsic that has overloaded integer result and fp operand.
613static Value *
615 unsigned IntrinsicID,
616 unsigned ConstrainedIntrinsicID) {
617 llvm::Type *ResultType = CGF.ConvertType(E->getType());
618 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
619
620 if (CGF.Builder.getIsFPConstrained()) {
621 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
622 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
623 {ResultType, Src0->getType()});
624 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
625 } else {
626 Function *F =
627 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
628 return CGF.Builder.CreateCall(F, Src0);
629 }
630}
631
633 llvm::Intrinsic::ID IntrinsicID) {
634 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
635 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
636
637 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
638 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
639 llvm::Function *F =
640 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
641 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
642
643 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
644 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
645 CGF.EmitStoreOfScalar(Exp, LV);
646
647 return CGF.Builder.CreateExtractValue(Call, 0);
648}
649
650/// EmitFAbs - Emit a call to @llvm.fabs().
652 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
653 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
654 Call->setDoesNotAccessMemory();
655 return Call;
656}
657
658/// Emit the computation of the sign bit for a floating point value. Returns
659/// the i1 sign bit value.
661 LLVMContext &C = CGF.CGM.getLLVMContext();
662
663 llvm::Type *Ty = V->getType();
664 int Width = Ty->getPrimitiveSizeInBits();
665 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
666 V = CGF.Builder.CreateBitCast(V, IntTy);
667 if (Ty->isPPC_FP128Ty()) {
668 // We want the sign bit of the higher-order double. The bitcast we just
669 // did works as if the double-double was stored to memory and then
670 // read as an i128. The "store" will put the higher-order double in the
671 // lower address in both little- and big-Endian modes, but the "load"
672 // will treat those bits as a different part of the i128: the low bits in
673 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
674 // we need to shift the high bits down to the low before truncating.
675 Width >>= 1;
676 if (CGF.getTarget().isBigEndian()) {
677 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
678 V = CGF.Builder.CreateLShr(V, ShiftCst);
679 }
680 // We are truncating value in order to extract the higher-order
681 // double, which we will be using to extract the sign from.
682 IntTy = llvm::IntegerType::get(C, Width);
683 V = CGF.Builder.CreateTrunc(V, IntTy);
684 }
685 Value *Zero = llvm::Constant::getNullValue(IntTy);
686 return CGF.Builder.CreateICmpSLT(V, Zero);
687}
688
690 const CallExpr *E, llvm::Constant *calleeValue) {
691 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
692 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
693 RValue Call =
694 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
695
696 if (unsigned BuiltinID = FD->getBuiltinID()) {
697 // Check whether a FP math builtin function, such as BI__builtin_expf
698 ASTContext &Context = CGF.getContext();
699 bool ConstWithoutErrnoAndExceptions =
701 // Restrict to target with errno, for example, MacOS doesn't set errno.
702 // TODO: Support builtin function with complex type returned, eg: cacosh
703 if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno &&
704 !CGF.Builder.getIsFPConstrained() && Call.isScalar()) {
705 // Emit "int" TBAA metadata on FP math libcalls.
706 clang::QualType IntTy = Context.IntTy;
707 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
708 Instruction *Inst = cast<llvm::Instruction>(Call.getScalarVal());
709 CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo);
710 }
711 }
712 return Call;
713}
714
715/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
716/// depending on IntrinsicID.
717///
718/// \arg CGF The current codegen function.
719/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
720/// \arg X The first argument to the llvm.*.with.overflow.*.
721/// \arg Y The second argument to the llvm.*.with.overflow.*.
722/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
723/// \returns The result (i.e. sum/product) returned by the intrinsic.
724static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
725 const llvm::Intrinsic::ID IntrinsicID,
726 llvm::Value *X, llvm::Value *Y,
727 llvm::Value *&Carry) {
728 // Make sure we have integers of the same width.
729 assert(X->getType() == Y->getType() &&
730 "Arguments must be the same type. (Did you forget to make sure both "
731 "arguments have the same integer width?)");
732
733 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
734 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
735 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
736 return CGF.Builder.CreateExtractValue(Tmp, 0);
737}
738
739static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
740 int low, int high) {
741 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
742 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
743 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
744 Call->addRangeRetAttr(CR);
745 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
746 return Call;
747}
748
749namespace {
750 struct WidthAndSignedness {
751 unsigned Width;
752 bool Signed;
753 };
754}
755
756static WidthAndSignedness
758 const clang::QualType Type) {
759 assert(Type->isIntegerType() && "Given type is not an integer.");
760 unsigned Width = context.getIntWidth(Type);
762 return {Width, Signed};
763}
764
765// Given one or more integer types, this function produces an integer type that
766// encompasses them: any value in one of the given types could be expressed in
767// the encompassing type.
768static struct WidthAndSignedness
769EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
770 assert(Types.size() > 0 && "Empty list of types.");
771
772 // If any of the given types is signed, we must return a signed type.
773 bool Signed = false;
774 for (const auto &Type : Types) {
775 Signed |= Type.Signed;
776 }
777
778 // The encompassing type must have a width greater than or equal to the width
779 // of the specified types. Additionally, if the encompassing type is signed,
780 // its width must be strictly greater than the width of any unsigned types
781 // given.
782 unsigned Width = 0;
783 for (const auto &Type : Types) {
784 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
785 if (Width < MinWidth) {
786 Width = MinWidth;
787 }
788 }
789
790 return {Width, Signed};
791}
792
793Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
794 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
795 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
796 ArgValue);
797}
798
799/// Checks if using the result of __builtin_object_size(p, @p From) in place of
800/// __builtin_object_size(p, @p To) is correct
801static bool areBOSTypesCompatible(int From, int To) {
802 // Note: Our __builtin_object_size implementation currently treats Type=0 and
803 // Type=2 identically. Encoding this implementation detail here may make
804 // improving __builtin_object_size difficult in the future, so it's omitted.
805 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
806}
807
808static llvm::Value *
809getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
810 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
811}
812
813llvm::Value *
814CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
815 llvm::IntegerType *ResType,
816 llvm::Value *EmittedE,
817 bool IsDynamic) {
818 uint64_t ObjectSize;
819 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
820 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
821 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
822}
823
825 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
826 uint64_t &Offset) {
827 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
828 getLangOpts().getStrictFlexArraysLevel();
829 uint32_t FieldNo = 0;
830
831 if (RD->isImplicit())
832 return nullptr;
833
834 for (const FieldDecl *FD : RD->fields()) {
835 if ((!FAMDecl || FD == FAMDecl) &&
837 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
838 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
839 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
840 Offset += Layout.getFieldOffset(FieldNo);
841 return FD;
842 }
843
844 QualType Ty = FD->getType();
845 if (Ty->isRecordType()) {
847 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
848 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
849 Offset += Layout.getFieldOffset(FieldNo);
850 return Field;
851 }
852 }
853
854 if (!RD->isUnion())
855 ++FieldNo;
856 }
857
858 return nullptr;
859}
860
861static unsigned CountCountedByAttrs(const RecordDecl *RD) {
862 unsigned Num = 0;
863
864 for (const FieldDecl *FD : RD->fields()) {
865 if (FD->getType()->isCountAttributedType())
866 return ++Num;
867
868 QualType Ty = FD->getType();
869 if (Ty->isRecordType())
871 }
872
873 return Num;
874}
875
876llvm::Value *
877CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
878 llvm::IntegerType *ResType) {
879 // The code generated here calculates the size of a struct with a flexible
880 // array member that uses the counted_by attribute. There are two instances
881 // we handle:
882 //
883 // struct s {
884 // unsigned long flags;
885 // int count;
886 // int array[] __attribute__((counted_by(count)));
887 // }
888 //
889 // 1) bdos of the flexible array itself:
890 //
891 // __builtin_dynamic_object_size(p->array, 1) ==
892 // p->count * sizeof(*p->array)
893 //
894 // 2) bdos of a pointer into the flexible array:
895 //
896 // __builtin_dynamic_object_size(&p->array[42], 1) ==
897 // (p->count - 42) * sizeof(*p->array)
898 //
899 // 2) bdos of the whole struct, including the flexible array:
900 //
901 // __builtin_dynamic_object_size(p, 1) ==
902 // max(sizeof(struct s),
903 // offsetof(struct s, array) + p->count * sizeof(*p->array))
904 //
905 ASTContext &Ctx = getContext();
906 const Expr *Base = E->IgnoreParenImpCasts();
907 const Expr *Idx = nullptr;
908
909 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
910 UO && UO->getOpcode() == UO_AddrOf) {
911 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
912 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
913 Base = ASE->getBase()->IgnoreParenImpCasts();
914 Idx = ASE->getIdx()->IgnoreParenImpCasts();
915
916 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
917 int64_t Val = IL->getValue().getSExtValue();
918 if (Val < 0)
920
921 if (Val == 0)
922 // The index is 0, so we don't need to take it into account.
923 Idx = nullptr;
924 }
925 } else {
926 // Potential pointer to another element in the struct.
927 Base = SubExpr;
928 }
929 }
930
931 // Get the flexible array member Decl.
932 const RecordDecl *OuterRD = nullptr;
933 const FieldDecl *FAMDecl = nullptr;
934 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
935 // Check if \p Base is referencing the FAM itself.
936 const ValueDecl *VD = ME->getMemberDecl();
938 FAMDecl = dyn_cast<FieldDecl>(VD);
939 if (!FAMDecl)
940 return nullptr;
941 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
942 // Check if we're pointing to the whole struct.
943 QualType Ty = DRE->getDecl()->getType();
944 if (Ty->isPointerType())
945 Ty = Ty->getPointeeType();
946 OuterRD = Ty->getAsRecordDecl();
947
948 // If we have a situation like this:
949 //
950 // struct union_of_fams {
951 // int flags;
952 // union {
953 // signed char normal_field;
954 // struct {
955 // int count1;
956 // int arr1[] __counted_by(count1);
957 // };
958 // struct {
959 // signed char count2;
960 // int arr2[] __counted_by(count2);
961 // };
962 // };
963 // };
964 //
965 // We don't know which 'count' to use in this scenario:
966 //
967 // size_t get_size(struct union_of_fams *p) {
968 // return __builtin_dynamic_object_size(p, 1);
969 // }
970 //
971 // Instead of calculating a wrong number, we give up.
972 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
973 return nullptr;
974 }
975
976 if (!OuterRD)
977 return nullptr;
978
979 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
980 // get its offset.
981 uint64_t Offset = 0;
982 FAMDecl =
983 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
984 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
985
986 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
987 // No flexible array member found or it doesn't have the "counted_by"
988 // attribute.
989 return nullptr;
990
991 const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
992 if (!CountedByFD)
993 // Can't find the field referenced by the "counted_by" attribute.
994 return nullptr;
995
996 // Build a load of the counted_by field.
997 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
998 Value *CountedByInst = EmitLoadOfCountedByField(Base, FAMDecl, CountedByFD);
999 if (!CountedByInst)
1000 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1001
1002 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1003
1004 // Build a load of the index and subtract it from the count.
1005 Value *IdxInst = nullptr;
1006 if (Idx) {
1007 if (Idx->HasSideEffects(getContext()))
1008 // We can't have side-effects.
1009 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1010
1011 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1012 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1013 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1014
1015 // We go ahead with the calculation here. If the index turns out to be
1016 // negative, we'll catch it at the end.
1017 CountedByInst =
1018 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1019 }
1020
1021 // Calculate how large the flexible array member is in bytes.
1022 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1024 llvm::Constant *ElemSize =
1025 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1026 Value *FAMSize =
1027 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1028 FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned);
1029 Value *Res = FAMSize;
1030
1031 if (isa<DeclRefExpr>(Base)) {
1032 // The whole struct is specificed in the __bdos.
1033 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
1034
1035 // Get the offset of the FAM.
1036 llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned);
1037 Value *OffsetAndFAMSize =
1038 Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
1039
1040 // Get the full size of the struct.
1041 llvm::Constant *SizeofStruct =
1042 ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
1043
1044 // max(sizeof(struct s),
1045 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1046 Res = IsSigned
1047 ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
1048 OffsetAndFAMSize, SizeofStruct)
1049 : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
1050 OffsetAndFAMSize, SizeofStruct);
1051 }
1052
1053 // A negative \p IdxInst or \p CountedByInst means that the index lands
1054 // outside of the flexible array member. If that's the case, we want to
1055 // return 0.
1056 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1057 if (IdxInst)
1058 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1059
1060 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1061}
1062
1063/// Returns a Value corresponding to the size of the given expression.
1064/// This Value may be either of the following:
1065/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1066/// it)
1067/// - A call to the @llvm.objectsize intrinsic
1068///
1069/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1070/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1071/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1072llvm::Value *
1073CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1074 llvm::IntegerType *ResType,
1075 llvm::Value *EmittedE, bool IsDynamic) {
1076 // We need to reference an argument if the pointer is a parameter with the
1077 // pass_object_size attribute.
1078 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1079 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1080 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1081 if (Param != nullptr && PS != nullptr &&
1082 areBOSTypesCompatible(PS->getType(), Type)) {
1083 auto Iter = SizeArguments.find(Param);
1084 assert(Iter != SizeArguments.end());
1085
1086 const ImplicitParamDecl *D = Iter->second;
1087 auto DIter = LocalDeclMap.find(D);
1088 assert(DIter != LocalDeclMap.end());
1089
1090 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1091 getContext().getSizeType(), E->getBeginLoc());
1092 }
1093 }
1094
1095 if (IsDynamic) {
1096 // Emit special code for a flexible array member with the "counted_by"
1097 // attribute.
1098 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1099 return V;
1100 }
1101
1102 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1103 // evaluate E for side-effects. In either case, we shouldn't lower to
1104 // @llvm.objectsize.
1105 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1106 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1107
1108 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1109 assert(Ptr->getType()->isPointerTy() &&
1110 "Non-pointer passed to __builtin_object_size?");
1111
1112 Function *F =
1113 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1114
1115 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1116 Value *Min = Builder.getInt1((Type & 2) != 0);
1117 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1118 Value *NullIsUnknown = Builder.getTrue();
1119 Value *Dynamic = Builder.getInt1(IsDynamic);
1120 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1121}
1122
1123namespace {
1124/// A struct to generically describe a bit test intrinsic.
1125struct BitTest {
1126 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1127 enum InterlockingKind : uint8_t {
1128 Unlocked,
1129 Sequential,
1130 Acquire,
1131 Release,
1132 NoFence
1133 };
1134
1135 ActionKind Action;
1136 InterlockingKind Interlocking;
1137 bool Is64Bit;
1138
1139 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1140};
1141
1142} // namespace
1143
1144BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1145 switch (BuiltinID) {
1146 // Main portable variants.
1147 case Builtin::BI_bittest:
1148 return {TestOnly, Unlocked, false};
1149 case Builtin::BI_bittestandcomplement:
1150 return {Complement, Unlocked, false};
1151 case Builtin::BI_bittestandreset:
1152 return {Reset, Unlocked, false};
1153 case Builtin::BI_bittestandset:
1154 return {Set, Unlocked, false};
1155 case Builtin::BI_interlockedbittestandreset:
1156 return {Reset, Sequential, false};
1157 case Builtin::BI_interlockedbittestandset:
1158 return {Set, Sequential, false};
1159
1160 // X86-specific 64-bit variants.
1161 case Builtin::BI_bittest64:
1162 return {TestOnly, Unlocked, true};
1163 case Builtin::BI_bittestandcomplement64:
1164 return {Complement, Unlocked, true};
1165 case Builtin::BI_bittestandreset64:
1166 return {Reset, Unlocked, true};
1167 case Builtin::BI_bittestandset64:
1168 return {Set, Unlocked, true};
1169 case Builtin::BI_interlockedbittestandreset64:
1170 return {Reset, Sequential, true};
1171 case Builtin::BI_interlockedbittestandset64:
1172 return {Set, Sequential, true};
1173
1174 // ARM/AArch64-specific ordering variants.
1175 case Builtin::BI_interlockedbittestandset_acq:
1176 return {Set, Acquire, false};
1177 case Builtin::BI_interlockedbittestandset_rel:
1178 return {Set, Release, false};
1179 case Builtin::BI_interlockedbittestandset_nf:
1180 return {Set, NoFence, false};
1181 case Builtin::BI_interlockedbittestandreset_acq:
1182 return {Reset, Acquire, false};
1183 case Builtin::BI_interlockedbittestandreset_rel:
1184 return {Reset, Release, false};
1185 case Builtin::BI_interlockedbittestandreset_nf:
1186 return {Reset, NoFence, false};
1187 }
1188 llvm_unreachable("expected only bittest intrinsics");
1189}
1190
1191static char bitActionToX86BTCode(BitTest::ActionKind A) {
1192 switch (A) {
1193 case BitTest::TestOnly: return '\0';
1194 case BitTest::Complement: return 'c';
1195 case BitTest::Reset: return 'r';
1196 case BitTest::Set: return 's';
1197 }
1198 llvm_unreachable("invalid action");
1199}
1200
1202 BitTest BT,
1203 const CallExpr *E, Value *BitBase,
1204 Value *BitPos) {
1205 char Action = bitActionToX86BTCode(BT.Action);
1206 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1207
1208 // Build the assembly.
1210 raw_svector_ostream AsmOS(Asm);
1211 if (BT.Interlocking != BitTest::Unlocked)
1212 AsmOS << "lock ";
1213 AsmOS << "bt";
1214 if (Action)
1215 AsmOS << Action;
1216 AsmOS << SizeSuffix << " $2, ($1)";
1217
1218 // Build the constraints. FIXME: We should support immediates when possible.
1219 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1220 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1221 if (!MachineClobbers.empty()) {
1222 Constraints += ',';
1223 Constraints += MachineClobbers;
1224 }
1225 llvm::IntegerType *IntType = llvm::IntegerType::get(
1226 CGF.getLLVMContext(),
1227 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1228 llvm::FunctionType *FTy =
1229 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1230
1231 llvm::InlineAsm *IA =
1232 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1233 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1234}
1235
1236static llvm::AtomicOrdering
1237getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1238 switch (I) {
1239 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1240 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1241 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1242 case BitTest::Release: return llvm::AtomicOrdering::Release;
1243 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1244 }
1245 llvm_unreachable("invalid interlocking");
1246}
1247
1248/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1249/// bits and a bit position and read and optionally modify the bit at that
1250/// position. The position index can be arbitrarily large, i.e. it can be larger
1251/// than 31 or 63, so we need an indexed load in the general case.
1252static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1253 unsigned BuiltinID,
1254 const CallExpr *E) {
1255 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1256 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1257
1258 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1259
1260 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1261 // indexing operation internally. Use them if possible.
1262 if (CGF.getTarget().getTriple().isX86())
1263 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1264
1265 // Otherwise, use generic code to load one byte and test the bit. Use all but
1266 // the bottom three bits as the array index, and the bottom three bits to form
1267 // a mask.
1268 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1269 Value *ByteIndex = CGF.Builder.CreateAShr(
1270 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1271 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
1272 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
1273 ByteIndex, "bittest.byteaddr"),
1274 CGF.Int8Ty, CharUnits::One());
1275 Value *PosLow =
1276 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1277 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1278
1279 // The updating instructions will need a mask.
1280 Value *Mask = nullptr;
1281 if (BT.Action != BitTest::TestOnly) {
1282 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1283 "bittest.mask");
1284 }
1285
1286 // Check the action and ordering of the interlocked intrinsics.
1287 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1288
1289 Value *OldByte = nullptr;
1290 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1291 // Emit a combined atomicrmw load/store operation for the interlocked
1292 // intrinsics.
1293 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1294 if (BT.Action == BitTest::Reset) {
1295 Mask = CGF.Builder.CreateNot(Mask);
1296 RMWOp = llvm::AtomicRMWInst::And;
1297 }
1298 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1299 } else {
1300 // Emit a plain load for the non-interlocked intrinsics.
1301 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1302 Value *NewByte = nullptr;
1303 switch (BT.Action) {
1304 case BitTest::TestOnly:
1305 // Don't store anything.
1306 break;
1307 case BitTest::Complement:
1308 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1309 break;
1310 case BitTest::Reset:
1311 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1312 break;
1313 case BitTest::Set:
1314 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1315 break;
1316 }
1317 if (NewByte)
1318 CGF.Builder.CreateStore(NewByte, ByteAddr);
1319 }
1320
1321 // However we loaded the old byte, either by plain load or atomicrmw, shift
1322 // the bit into the low position and mask it to 0 or 1.
1323 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1324 return CGF.Builder.CreateAnd(
1325 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1326}
1327
1329 unsigned BuiltinID,
1330 const CallExpr *E) {
1331 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1332
1334 raw_svector_ostream AsmOS(Asm);
1335 llvm::IntegerType *RetType = CGF.Int32Ty;
1336
1337 switch (BuiltinID) {
1338 case clang::PPC::BI__builtin_ppc_ldarx:
1339 AsmOS << "ldarx ";
1340 RetType = CGF.Int64Ty;
1341 break;
1342 case clang::PPC::BI__builtin_ppc_lwarx:
1343 AsmOS << "lwarx ";
1344 RetType = CGF.Int32Ty;
1345 break;
1346 case clang::PPC::BI__builtin_ppc_lharx:
1347 AsmOS << "lharx ";
1348 RetType = CGF.Int16Ty;
1349 break;
1350 case clang::PPC::BI__builtin_ppc_lbarx:
1351 AsmOS << "lbarx ";
1352 RetType = CGF.Int8Ty;
1353 break;
1354 default:
1355 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1356 }
1357
1358 AsmOS << "$0, ${1:y}";
1359
1360 std::string Constraints = "=r,*Z,~{memory}";
1361 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1362 if (!MachineClobbers.empty()) {
1363 Constraints += ',';
1364 Constraints += MachineClobbers;
1365 }
1366
1367 llvm::Type *PtrType = CGF.UnqualPtrTy;
1368 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1369
1370 llvm::InlineAsm *IA =
1371 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1372 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1373 CI->addParamAttr(
1374 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1375 return CI;
1376}
1377
1378namespace {
1379enum class MSVCSetJmpKind {
1380 _setjmpex,
1381 _setjmp3,
1382 _setjmp
1383};
1384}
1385
1386/// MSVC handles setjmp a bit differently on different platforms. On every
1387/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1388/// parameters can be passed as variadic arguments, but we always pass none.
1389static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1390 const CallExpr *E) {
1391 llvm::Value *Arg1 = nullptr;
1392 llvm::Type *Arg1Ty = nullptr;
1393 StringRef Name;
1394 bool IsVarArg = false;
1395 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1396 Name = "_setjmp3";
1397 Arg1Ty = CGF.Int32Ty;
1398 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1399 IsVarArg = true;
1400 } else {
1401 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1402 Arg1Ty = CGF.Int8PtrTy;
1403 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1404 Arg1 = CGF.Builder.CreateCall(
1405 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1406 } else
1407 Arg1 = CGF.Builder.CreateCall(
1408 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1409 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1410 }
1411
1412 // Mark the call site and declaration with ReturnsTwice.
1413 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1414 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1415 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1416 llvm::Attribute::ReturnsTwice);
1417 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1418 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1419 ReturnsTwiceAttr, /*Local=*/true);
1420
1421 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1422 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1423 llvm::Value *Args[] = {Buf, Arg1};
1424 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1425 CB->setAttributes(ReturnsTwiceAttr);
1426 return RValue::get(CB);
1427}
1428
1429// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1430// we handle them here.
1470 __fastfail,
1471};
1472
1473static std::optional<CodeGenFunction::MSVCIntrin>
1474translateArmToMsvcIntrin(unsigned BuiltinID) {
1475 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1476 switch (BuiltinID) {
1477 default:
1478 return std::nullopt;
1479 case clang::ARM::BI_BitScanForward:
1480 case clang::ARM::BI_BitScanForward64:
1481 return MSVCIntrin::_BitScanForward;
1482 case clang::ARM::BI_BitScanReverse:
1483 case clang::ARM::BI_BitScanReverse64:
1484 return MSVCIntrin::_BitScanReverse;
1485 case clang::ARM::BI_InterlockedAnd64:
1486 return MSVCIntrin::_InterlockedAnd;
1487 case clang::ARM::BI_InterlockedExchange64:
1488 return MSVCIntrin::_InterlockedExchange;
1489 case clang::ARM::BI_InterlockedExchangeAdd64:
1490 return MSVCIntrin::_InterlockedExchangeAdd;
1491 case clang::ARM::BI_InterlockedExchangeSub64:
1492 return MSVCIntrin::_InterlockedExchangeSub;
1493 case clang::ARM::BI_InterlockedOr64:
1494 return MSVCIntrin::_InterlockedOr;
1495 case clang::ARM::BI_InterlockedXor64:
1496 return MSVCIntrin::_InterlockedXor;
1497 case clang::ARM::BI_InterlockedDecrement64:
1498 return MSVCIntrin::_InterlockedDecrement;
1499 case clang::ARM::BI_InterlockedIncrement64:
1500 return MSVCIntrin::_InterlockedIncrement;
1501 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1502 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1503 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1504 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1505 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1506 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1507 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1508 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1509 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1510 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1511 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1512 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1513 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1514 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1515 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1516 case clang::ARM::BI_InterlockedExchange8_acq:
1517 case clang::ARM::BI_InterlockedExchange16_acq:
1518 case clang::ARM::BI_InterlockedExchange_acq:
1519 case clang::ARM::BI_InterlockedExchange64_acq:
1520 return MSVCIntrin::_InterlockedExchange_acq;
1521 case clang::ARM::BI_InterlockedExchange8_rel:
1522 case clang::ARM::BI_InterlockedExchange16_rel:
1523 case clang::ARM::BI_InterlockedExchange_rel:
1524 case clang::ARM::BI_InterlockedExchange64_rel:
1525 return MSVCIntrin::_InterlockedExchange_rel;
1526 case clang::ARM::BI_InterlockedExchange8_nf:
1527 case clang::ARM::BI_InterlockedExchange16_nf:
1528 case clang::ARM::BI_InterlockedExchange_nf:
1529 case clang::ARM::BI_InterlockedExchange64_nf:
1530 return MSVCIntrin::_InterlockedExchange_nf;
1531 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1532 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1533 case clang::ARM::BI_InterlockedCompareExchange_acq:
1534 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1535 return MSVCIntrin::_InterlockedCompareExchange_acq;
1536 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1537 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1538 case clang::ARM::BI_InterlockedCompareExchange_rel:
1539 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1540 return MSVCIntrin::_InterlockedCompareExchange_rel;
1541 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1542 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1543 case clang::ARM::BI_InterlockedCompareExchange_nf:
1544 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1545 return MSVCIntrin::_InterlockedCompareExchange_nf;
1546 case clang::ARM::BI_InterlockedOr8_acq:
1547 case clang::ARM::BI_InterlockedOr16_acq:
1548 case clang::ARM::BI_InterlockedOr_acq:
1549 case clang::ARM::BI_InterlockedOr64_acq:
1550 return MSVCIntrin::_InterlockedOr_acq;
1551 case clang::ARM::BI_InterlockedOr8_rel:
1552 case clang::ARM::BI_InterlockedOr16_rel:
1553 case clang::ARM::BI_InterlockedOr_rel:
1554 case clang::ARM::BI_InterlockedOr64_rel:
1555 return MSVCIntrin::_InterlockedOr_rel;
1556 case clang::ARM::BI_InterlockedOr8_nf:
1557 case clang::ARM::BI_InterlockedOr16_nf:
1558 case clang::ARM::BI_InterlockedOr_nf:
1559 case clang::ARM::BI_InterlockedOr64_nf:
1560 return MSVCIntrin::_InterlockedOr_nf;
1561 case clang::ARM::BI_InterlockedXor8_acq:
1562 case clang::ARM::BI_InterlockedXor16_acq:
1563 case clang::ARM::BI_InterlockedXor_acq:
1564 case clang::ARM::BI_InterlockedXor64_acq:
1565 return MSVCIntrin::_InterlockedXor_acq;
1566 case clang::ARM::BI_InterlockedXor8_rel:
1567 case clang::ARM::BI_InterlockedXor16_rel:
1568 case clang::ARM::BI_InterlockedXor_rel:
1569 case clang::ARM::BI_InterlockedXor64_rel:
1570 return MSVCIntrin::_InterlockedXor_rel;
1571 case clang::ARM::BI_InterlockedXor8_nf:
1572 case clang::ARM::BI_InterlockedXor16_nf:
1573 case clang::ARM::BI_InterlockedXor_nf:
1574 case clang::ARM::BI_InterlockedXor64_nf:
1575 return MSVCIntrin::_InterlockedXor_nf;
1576 case clang::ARM::BI_InterlockedAnd8_acq:
1577 case clang::ARM::BI_InterlockedAnd16_acq:
1578 case clang::ARM::BI_InterlockedAnd_acq:
1579 case clang::ARM::BI_InterlockedAnd64_acq:
1580 return MSVCIntrin::_InterlockedAnd_acq;
1581 case clang::ARM::BI_InterlockedAnd8_rel:
1582 case clang::ARM::BI_InterlockedAnd16_rel:
1583 case clang::ARM::BI_InterlockedAnd_rel:
1584 case clang::ARM::BI_InterlockedAnd64_rel:
1585 return MSVCIntrin::_InterlockedAnd_rel;
1586 case clang::ARM::BI_InterlockedAnd8_nf:
1587 case clang::ARM::BI_InterlockedAnd16_nf:
1588 case clang::ARM::BI_InterlockedAnd_nf:
1589 case clang::ARM::BI_InterlockedAnd64_nf:
1590 return MSVCIntrin::_InterlockedAnd_nf;
1591 case clang::ARM::BI_InterlockedIncrement16_acq:
1592 case clang::ARM::BI_InterlockedIncrement_acq:
1593 case clang::ARM::BI_InterlockedIncrement64_acq:
1594 return MSVCIntrin::_InterlockedIncrement_acq;
1595 case clang::ARM::BI_InterlockedIncrement16_rel:
1596 case clang::ARM::BI_InterlockedIncrement_rel:
1597 case clang::ARM::BI_InterlockedIncrement64_rel:
1598 return MSVCIntrin::_InterlockedIncrement_rel;
1599 case clang::ARM::BI_InterlockedIncrement16_nf:
1600 case clang::ARM::BI_InterlockedIncrement_nf:
1601 case clang::ARM::BI_InterlockedIncrement64_nf:
1602 return MSVCIntrin::_InterlockedIncrement_nf;
1603 case clang::ARM::BI_InterlockedDecrement16_acq:
1604 case clang::ARM::BI_InterlockedDecrement_acq:
1605 case clang::ARM::BI_InterlockedDecrement64_acq:
1606 return MSVCIntrin::_InterlockedDecrement_acq;
1607 case clang::ARM::BI_InterlockedDecrement16_rel:
1608 case clang::ARM::BI_InterlockedDecrement_rel:
1609 case clang::ARM::BI_InterlockedDecrement64_rel:
1610 return MSVCIntrin::_InterlockedDecrement_rel;
1611 case clang::ARM::BI_InterlockedDecrement16_nf:
1612 case clang::ARM::BI_InterlockedDecrement_nf:
1613 case clang::ARM::BI_InterlockedDecrement64_nf:
1614 return MSVCIntrin::_InterlockedDecrement_nf;
1615 }
1616 llvm_unreachable("must return from switch");
1617}
1618
1619static std::optional<CodeGenFunction::MSVCIntrin>
1620translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1621 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1622 switch (BuiltinID) {
1623 default:
1624 return std::nullopt;
1625 case clang::AArch64::BI_BitScanForward:
1626 case clang::AArch64::BI_BitScanForward64:
1627 return MSVCIntrin::_BitScanForward;
1628 case clang::AArch64::BI_BitScanReverse:
1629 case clang::AArch64::BI_BitScanReverse64:
1630 return MSVCIntrin::_BitScanReverse;
1631 case clang::AArch64::BI_InterlockedAnd64:
1632 return MSVCIntrin::_InterlockedAnd;
1633 case clang::AArch64::BI_InterlockedExchange64:
1634 return MSVCIntrin::_InterlockedExchange;
1635 case clang::AArch64::BI_InterlockedExchangeAdd64:
1636 return MSVCIntrin::_InterlockedExchangeAdd;
1637 case clang::AArch64::BI_InterlockedExchangeSub64:
1638 return MSVCIntrin::_InterlockedExchangeSub;
1639 case clang::AArch64::BI_InterlockedOr64:
1640 return MSVCIntrin::_InterlockedOr;
1641 case clang::AArch64::BI_InterlockedXor64:
1642 return MSVCIntrin::_InterlockedXor;
1643 case clang::AArch64::BI_InterlockedDecrement64:
1644 return MSVCIntrin::_InterlockedDecrement;
1645 case clang::AArch64::BI_InterlockedIncrement64:
1646 return MSVCIntrin::_InterlockedIncrement;
1647 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1648 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1649 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1650 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1651 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1652 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1653 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1654 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1655 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1656 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1657 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1658 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1659 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1660 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1661 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1662 case clang::AArch64::BI_InterlockedExchange8_acq:
1663 case clang::AArch64::BI_InterlockedExchange16_acq:
1664 case clang::AArch64::BI_InterlockedExchange_acq:
1665 case clang::AArch64::BI_InterlockedExchange64_acq:
1666 return MSVCIntrin::_InterlockedExchange_acq;
1667 case clang::AArch64::BI_InterlockedExchange8_rel:
1668 case clang::AArch64::BI_InterlockedExchange16_rel:
1669 case clang::AArch64::BI_InterlockedExchange_rel:
1670 case clang::AArch64::BI_InterlockedExchange64_rel:
1671 return MSVCIntrin::_InterlockedExchange_rel;
1672 case clang::AArch64::BI_InterlockedExchange8_nf:
1673 case clang::AArch64::BI_InterlockedExchange16_nf:
1674 case clang::AArch64::BI_InterlockedExchange_nf:
1675 case clang::AArch64::BI_InterlockedExchange64_nf:
1676 return MSVCIntrin::_InterlockedExchange_nf;
1677 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1678 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1679 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1680 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1681 return MSVCIntrin::_InterlockedCompareExchange_acq;
1682 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1683 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1684 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1685 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1686 return MSVCIntrin::_InterlockedCompareExchange_rel;
1687 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1688 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1689 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1690 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1691 return MSVCIntrin::_InterlockedCompareExchange_nf;
1692 case clang::AArch64::BI_InterlockedCompareExchange128:
1693 return MSVCIntrin::_InterlockedCompareExchange128;
1694 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1695 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1696 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1697 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1698 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1699 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1700 case clang::AArch64::BI_InterlockedOr8_acq:
1701 case clang::AArch64::BI_InterlockedOr16_acq:
1702 case clang::AArch64::BI_InterlockedOr_acq:
1703 case clang::AArch64::BI_InterlockedOr64_acq:
1704 return MSVCIntrin::_InterlockedOr_acq;
1705 case clang::AArch64::BI_InterlockedOr8_rel:
1706 case clang::AArch64::BI_InterlockedOr16_rel:
1707 case clang::AArch64::BI_InterlockedOr_rel:
1708 case clang::AArch64::BI_InterlockedOr64_rel:
1709 return MSVCIntrin::_InterlockedOr_rel;
1710 case clang::AArch64::BI_InterlockedOr8_nf:
1711 case clang::AArch64::BI_InterlockedOr16_nf:
1712 case clang::AArch64::BI_InterlockedOr_nf:
1713 case clang::AArch64::BI_InterlockedOr64_nf:
1714 return MSVCIntrin::_InterlockedOr_nf;
1715 case clang::AArch64::BI_InterlockedXor8_acq:
1716 case clang::AArch64::BI_InterlockedXor16_acq:
1717 case clang::AArch64::BI_InterlockedXor_acq:
1718 case clang::AArch64::BI_InterlockedXor64_acq:
1719 return MSVCIntrin::_InterlockedXor_acq;
1720 case clang::AArch64::BI_InterlockedXor8_rel:
1721 case clang::AArch64::BI_InterlockedXor16_rel:
1722 case clang::AArch64::BI_InterlockedXor_rel:
1723 case clang::AArch64::BI_InterlockedXor64_rel:
1724 return MSVCIntrin::_InterlockedXor_rel;
1725 case clang::AArch64::BI_InterlockedXor8_nf:
1726 case clang::AArch64::BI_InterlockedXor16_nf:
1727 case clang::AArch64::BI_InterlockedXor_nf:
1728 case clang::AArch64::BI_InterlockedXor64_nf:
1729 return MSVCIntrin::_InterlockedXor_nf;
1730 case clang::AArch64::BI_InterlockedAnd8_acq:
1731 case clang::AArch64::BI_InterlockedAnd16_acq:
1732 case clang::AArch64::BI_InterlockedAnd_acq:
1733 case clang::AArch64::BI_InterlockedAnd64_acq:
1734 return MSVCIntrin::_InterlockedAnd_acq;
1735 case clang::AArch64::BI_InterlockedAnd8_rel:
1736 case clang::AArch64::BI_InterlockedAnd16_rel:
1737 case clang::AArch64::BI_InterlockedAnd_rel:
1738 case clang::AArch64::BI_InterlockedAnd64_rel:
1739 return MSVCIntrin::_InterlockedAnd_rel;
1740 case clang::AArch64::BI_InterlockedAnd8_nf:
1741 case clang::AArch64::BI_InterlockedAnd16_nf:
1742 case clang::AArch64::BI_InterlockedAnd_nf:
1743 case clang::AArch64::BI_InterlockedAnd64_nf:
1744 return MSVCIntrin::_InterlockedAnd_nf;
1745 case clang::AArch64::BI_InterlockedIncrement16_acq:
1746 case clang::AArch64::BI_InterlockedIncrement_acq:
1747 case clang::AArch64::BI_InterlockedIncrement64_acq:
1748 return MSVCIntrin::_InterlockedIncrement_acq;
1749 case clang::AArch64::BI_InterlockedIncrement16_rel:
1750 case clang::AArch64::BI_InterlockedIncrement_rel:
1751 case clang::AArch64::BI_InterlockedIncrement64_rel:
1752 return MSVCIntrin::_InterlockedIncrement_rel;
1753 case clang::AArch64::BI_InterlockedIncrement16_nf:
1754 case clang::AArch64::BI_InterlockedIncrement_nf:
1755 case clang::AArch64::BI_InterlockedIncrement64_nf:
1756 return MSVCIntrin::_InterlockedIncrement_nf;
1757 case clang::AArch64::BI_InterlockedDecrement16_acq:
1758 case clang::AArch64::BI_InterlockedDecrement_acq:
1759 case clang::AArch64::BI_InterlockedDecrement64_acq:
1760 return MSVCIntrin::_InterlockedDecrement_acq;
1761 case clang::AArch64::BI_InterlockedDecrement16_rel:
1762 case clang::AArch64::BI_InterlockedDecrement_rel:
1763 case clang::AArch64::BI_InterlockedDecrement64_rel:
1764 return MSVCIntrin::_InterlockedDecrement_rel;
1765 case clang::AArch64::BI_InterlockedDecrement16_nf:
1766 case clang::AArch64::BI_InterlockedDecrement_nf:
1767 case clang::AArch64::BI_InterlockedDecrement64_nf:
1768 return MSVCIntrin::_InterlockedDecrement_nf;
1769 }
1770 llvm_unreachable("must return from switch");
1771}
1772
1773static std::optional<CodeGenFunction::MSVCIntrin>
1774translateX86ToMsvcIntrin(unsigned BuiltinID) {
1775 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1776 switch (BuiltinID) {
1777 default:
1778 return std::nullopt;
1779 case clang::X86::BI_BitScanForward:
1780 case clang::X86::BI_BitScanForward64:
1781 return MSVCIntrin::_BitScanForward;
1782 case clang::X86::BI_BitScanReverse:
1783 case clang::X86::BI_BitScanReverse64:
1784 return MSVCIntrin::_BitScanReverse;
1785 case clang::X86::BI_InterlockedAnd64:
1786 return MSVCIntrin::_InterlockedAnd;
1787 case clang::X86::BI_InterlockedCompareExchange128:
1788 return MSVCIntrin::_InterlockedCompareExchange128;
1789 case clang::X86::BI_InterlockedExchange64:
1790 return MSVCIntrin::_InterlockedExchange;
1791 case clang::X86::BI_InterlockedExchangeAdd64:
1792 return MSVCIntrin::_InterlockedExchangeAdd;
1793 case clang::X86::BI_InterlockedExchangeSub64:
1794 return MSVCIntrin::_InterlockedExchangeSub;
1795 case clang::X86::BI_InterlockedOr64:
1796 return MSVCIntrin::_InterlockedOr;
1797 case clang::X86::BI_InterlockedXor64:
1798 return MSVCIntrin::_InterlockedXor;
1799 case clang::X86::BI_InterlockedDecrement64:
1800 return MSVCIntrin::_InterlockedDecrement;
1801 case clang::X86::BI_InterlockedIncrement64:
1802 return MSVCIntrin::_InterlockedIncrement;
1803 }
1804 llvm_unreachable("must return from switch");
1805}
1806
1807// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1808Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1809 const CallExpr *E) {
1810 switch (BuiltinID) {
1811 case MSVCIntrin::_BitScanForward:
1812 case MSVCIntrin::_BitScanReverse: {
1813 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1814 Value *ArgValue = EmitScalarExpr(E->getArg(1));
1815
1816 llvm::Type *ArgType = ArgValue->getType();
1817 llvm::Type *IndexType = IndexAddress.getElementType();
1818 llvm::Type *ResultType = ConvertType(E->getType());
1819
1820 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1821 Value *ResZero = llvm::Constant::getNullValue(ResultType);
1822 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1823
1824 BasicBlock *Begin = Builder.GetInsertBlock();
1825 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1826 Builder.SetInsertPoint(End);
1827 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1828
1829 Builder.SetInsertPoint(Begin);
1830 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1831 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1832 Builder.CreateCondBr(IsZero, End, NotZero);
1833 Result->addIncoming(ResZero, Begin);
1834
1835 Builder.SetInsertPoint(NotZero);
1836
1837 if (BuiltinID == MSVCIntrin::_BitScanForward) {
1838 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1839 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1840 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1841 Builder.CreateStore(ZeroCount, IndexAddress, false);
1842 } else {
1843 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1844 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
1845
1846 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1847 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1848 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1849 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
1850 Builder.CreateStore(Index, IndexAddress, false);
1851 }
1852 Builder.CreateBr(End);
1853 Result->addIncoming(ResOne, NotZero);
1854
1855 Builder.SetInsertPoint(End);
1856 return Result;
1857 }
1858 case MSVCIntrin::_InterlockedAnd:
1859 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
1860 case MSVCIntrin::_InterlockedExchange:
1861 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
1862 case MSVCIntrin::_InterlockedExchangeAdd:
1863 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
1864 case MSVCIntrin::_InterlockedExchangeSub:
1865 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
1866 case MSVCIntrin::_InterlockedOr:
1867 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
1868 case MSVCIntrin::_InterlockedXor:
1869 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
1870 case MSVCIntrin::_InterlockedExchangeAdd_acq:
1871 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1872 AtomicOrdering::Acquire);
1873 case MSVCIntrin::_InterlockedExchangeAdd_rel:
1874 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1875 AtomicOrdering::Release);
1876 case MSVCIntrin::_InterlockedExchangeAdd_nf:
1877 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1878 AtomicOrdering::Monotonic);
1879 case MSVCIntrin::_InterlockedExchange_acq:
1880 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1881 AtomicOrdering::Acquire);
1882 case MSVCIntrin::_InterlockedExchange_rel:
1883 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1884 AtomicOrdering::Release);
1885 case MSVCIntrin::_InterlockedExchange_nf:
1886 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1887 AtomicOrdering::Monotonic);
1888 case MSVCIntrin::_InterlockedCompareExchange_acq:
1889 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
1890 case MSVCIntrin::_InterlockedCompareExchange_rel:
1891 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
1892 case MSVCIntrin::_InterlockedCompareExchange_nf:
1893 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1894 case MSVCIntrin::_InterlockedCompareExchange128:
1896 *this, E, AtomicOrdering::SequentiallyConsistent);
1897 case MSVCIntrin::_InterlockedCompareExchange128_acq:
1898 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
1899 case MSVCIntrin::_InterlockedCompareExchange128_rel:
1900 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
1901 case MSVCIntrin::_InterlockedCompareExchange128_nf:
1902 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1903 case MSVCIntrin::_InterlockedOr_acq:
1904 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1905 AtomicOrdering::Acquire);
1906 case MSVCIntrin::_InterlockedOr_rel:
1907 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1908 AtomicOrdering::Release);
1909 case MSVCIntrin::_InterlockedOr_nf:
1910 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1911 AtomicOrdering::Monotonic);
1912 case MSVCIntrin::_InterlockedXor_acq:
1913 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1914 AtomicOrdering::Acquire);
1915 case MSVCIntrin::_InterlockedXor_rel:
1916 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1917 AtomicOrdering::Release);
1918 case MSVCIntrin::_InterlockedXor_nf:
1919 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1920 AtomicOrdering::Monotonic);
1921 case MSVCIntrin::_InterlockedAnd_acq:
1922 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1923 AtomicOrdering::Acquire);
1924 case MSVCIntrin::_InterlockedAnd_rel:
1925 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1926 AtomicOrdering::Release);
1927 case MSVCIntrin::_InterlockedAnd_nf:
1928 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1929 AtomicOrdering::Monotonic);
1930 case MSVCIntrin::_InterlockedIncrement_acq:
1931 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1932 case MSVCIntrin::_InterlockedIncrement_rel:
1933 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1934 case MSVCIntrin::_InterlockedIncrement_nf:
1935 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1936 case MSVCIntrin::_InterlockedDecrement_acq:
1937 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1938 case MSVCIntrin::_InterlockedDecrement_rel:
1939 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1940 case MSVCIntrin::_InterlockedDecrement_nf:
1941 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1942
1943 case MSVCIntrin::_InterlockedDecrement:
1944 return EmitAtomicDecrementValue(*this, E);
1945 case MSVCIntrin::_InterlockedIncrement:
1946 return EmitAtomicIncrementValue(*this, E);
1947
1948 case MSVCIntrin::__fastfail: {
1949 // Request immediate process termination from the kernel. The instruction
1950 // sequences to do this are documented on MSDN:
1951 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1952 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1953 StringRef Asm, Constraints;
1954 switch (ISA) {
1955 default:
1956 ErrorUnsupported(E, "__fastfail call for this architecture");
1957 break;
1958 case llvm::Triple::x86:
1959 case llvm::Triple::x86_64:
1960 Asm = "int $$0x29";
1961 Constraints = "{cx}";
1962 break;
1963 case llvm::Triple::thumb:
1964 Asm = "udf #251";
1965 Constraints = "{r0}";
1966 break;
1967 case llvm::Triple::aarch64:
1968 Asm = "brk #0xF003";
1969 Constraints = "{w0}";
1970 }
1971 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1972 llvm::InlineAsm *IA =
1973 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1974 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1975 getLLVMContext(), llvm::AttributeList::FunctionIndex,
1976 llvm::Attribute::NoReturn);
1977 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1978 CI->setAttributes(NoReturnAttr);
1979 return CI;
1980 }
1981 }
1982 llvm_unreachable("Incorrect MSVC intrinsic!");
1983}
1984
1985namespace {
1986// ARC cleanup for __builtin_os_log_format
1987struct CallObjCArcUse final : EHScopeStack::Cleanup {
1988 CallObjCArcUse(llvm::Value *object) : object(object) {}
1989 llvm::Value *object;
1990
1991 void Emit(CodeGenFunction &CGF, Flags flags) override {
1992 CGF.EmitARCIntrinsicUse(object);
1993 }
1994};
1995}
1996
1998 BuiltinCheckKind Kind) {
1999 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
2000 && "Unsupported builtin check kind");
2001
2002 Value *ArgValue = EmitScalarExpr(E);
2003 if (!SanOpts.has(SanitizerKind::Builtin))
2004 return ArgValue;
2005
2006 SanitizerScope SanScope(this);
2007 Value *Cond = Builder.CreateICmpNE(
2008 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2009 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2010 SanitizerHandler::InvalidBuiltin,
2012 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2013 std::nullopt);
2014 return ArgValue;
2015}
2016
2017static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2018 return CGF.Builder.CreateBinaryIntrinsic(
2019 Intrinsic::abs, ArgValue,
2020 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2021}
2022
2024 bool SanitizeOverflow) {
2025 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2026
2027 // Try to eliminate overflow check.
2028 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2029 if (!VCI->isMinSignedValue())
2030 return EmitAbs(CGF, ArgValue, true);
2031 }
2032
2033 CodeGenFunction::SanitizerScope SanScope(&CGF);
2034
2035 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2036 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2037 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2038 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2039 Value *NotOverflow = CGF.Builder.CreateNot(
2040 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2041
2042 // TODO: support -ftrapv-handler.
2043 if (SanitizeOverflow) {
2044 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2045 SanitizerHandler::NegateOverflow,
2046 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2048 {ArgValue});
2049 } else
2050 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2051
2052 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2053 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2054}
2055
2056/// Get the argument type for arguments to os_log_helper.
2058 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2059 return C.getCanonicalType(UnsignedTy);
2060}
2061
2064 CharUnits BufferAlignment) {
2065 ASTContext &Ctx = getContext();
2066
2068 {
2069 raw_svector_ostream OS(Name);
2070 OS << "__os_log_helper";
2071 OS << "_" << BufferAlignment.getQuantity();
2072 OS << "_" << int(Layout.getSummaryByte());
2073 OS << "_" << int(Layout.getNumArgsByte());
2074 for (const auto &Item : Layout.Items)
2075 OS << "_" << int(Item.getSizeByte()) << "_"
2076 << int(Item.getDescriptorByte());
2077 }
2078
2079 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2080 return F;
2081
2083 FunctionArgList Args;
2084 Args.push_back(ImplicitParamDecl::Create(
2085 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2087 ArgTys.emplace_back(Ctx.VoidPtrTy);
2088
2089 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2090 char Size = Layout.Items[I].getSizeByte();
2091 if (!Size)
2092 continue;
2093
2094 QualType ArgTy = getOSLogArgType(Ctx, Size);
2095 Args.push_back(ImplicitParamDecl::Create(
2096 Ctx, nullptr, SourceLocation(),
2097 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2099 ArgTys.emplace_back(ArgTy);
2100 }
2101
2102 QualType ReturnTy = Ctx.VoidTy;
2103
2104 // The helper function has linkonce_odr linkage to enable the linker to merge
2105 // identical functions. To ensure the merging always happens, 'noinline' is
2106 // attached to the function when compiling with -Oz.
2107 const CGFunctionInfo &FI =
2109 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2110 llvm::Function *Fn = llvm::Function::Create(
2111 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2112 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2113 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2115 Fn->setDoesNotThrow();
2116
2117 // Attach 'noinline' at -Oz.
2118 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2119 Fn->addFnAttr(llvm::Attribute::NoInline);
2120
2121 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2122 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2123
2124 // Create a scope with an artificial location for the body of this function.
2125 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2126
2127 CharUnits Offset;
2129 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2130 BufferAlignment);
2131 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2132 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2133 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2134 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2135
2136 unsigned I = 1;
2137 for (const auto &Item : Layout.Items) {
2139 Builder.getInt8(Item.getDescriptorByte()),
2140 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2142 Builder.getInt8(Item.getSizeByte()),
2143 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2144
2145 CharUnits Size = Item.size();
2146 if (!Size.getQuantity())
2147 continue;
2148
2149 Address Arg = GetAddrOfLocalVar(Args[I]);
2150 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2151 Addr = Addr.withElementType(Arg.getElementType());
2153 Offset += Size;
2154 ++I;
2155 }
2156
2158
2159 return Fn;
2160}
2161
2163 assert(E.getNumArgs() >= 2 &&
2164 "__builtin_os_log_format takes at least 2 arguments");
2165 ASTContext &Ctx = getContext();
2168 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2169 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2170
2171 // Ignore argument 1, the format string. It is not currently used.
2172 CallArgList Args;
2173 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2174
2175 for (const auto &Item : Layout.Items) {
2176 int Size = Item.getSizeByte();
2177 if (!Size)
2178 continue;
2179
2180 llvm::Value *ArgVal;
2181
2182 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2183 uint64_t Val = 0;
2184 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2185 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2186 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2187 } else if (const Expr *TheExpr = Item.getExpr()) {
2188 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2189
2190 // If a temporary object that requires destruction after the full
2191 // expression is passed, push a lifetime-extended cleanup to extend its
2192 // lifetime to the end of the enclosing block scope.
2193 auto LifetimeExtendObject = [&](const Expr *E) {
2194 E = E->IgnoreParenCasts();
2195 // Extend lifetimes of objects returned by function calls and message
2196 // sends.
2197
2198 // FIXME: We should do this in other cases in which temporaries are
2199 // created including arguments of non-ARC types (e.g., C++
2200 // temporaries).
2201 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2202 return true;
2203 return false;
2204 };
2205
2206 if (TheExpr->getType()->isObjCRetainableType() &&
2207 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2208 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2209 "Only scalar can be a ObjC retainable type");
2210 if (!isa<Constant>(ArgVal)) {
2211 CleanupKind Cleanup = getARCCleanupKind();
2212 QualType Ty = TheExpr->getType();
2214 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2215 ArgVal = EmitARCRetain(Ty, ArgVal);
2216 Builder.CreateStore(ArgVal, Addr);
2217 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2219 Cleanup & EHCleanup);
2220
2221 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2222 // argument has to be alive.
2223 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2224 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2225 }
2226 }
2227 } else {
2228 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2229 }
2230
2231 unsigned ArgValSize =
2232 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2233 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2234 ArgValSize);
2235 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2236 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2237 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2238 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2239 Args.add(RValue::get(ArgVal), ArgTy);
2240 }
2241
2242 const CGFunctionInfo &FI =
2245 Layout, BufAddr.getAlignment());
2247 return RValue::get(BufAddr, *this);
2248}
2249
2251 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2252 WidthAndSignedness ResultInfo) {
2253 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2254 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2255 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2256}
2257
2259 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2260 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2261 const clang::Expr *ResultArg, QualType ResultQTy,
2262 WidthAndSignedness ResultInfo) {
2264 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2265 "Cannot specialize this multiply");
2266
2267 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2268 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2269
2270 llvm::Value *HasOverflow;
2271 llvm::Value *Result = EmitOverflowIntrinsic(
2272 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2273
2274 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2275 // however, since the original builtin had a signed result, we need to report
2276 // an overflow when the result is greater than INT_MAX.
2277 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2278 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2279
2280 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2281 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2282
2283 bool isVolatile =
2284 ResultArg->getType()->getPointeeType().isVolatileQualified();
2285 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2286 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2287 isVolatile);
2288 return RValue::get(HasOverflow);
2289}
2290
2291/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2292static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2293 WidthAndSignedness Op1Info,
2294 WidthAndSignedness Op2Info,
2295 WidthAndSignedness ResultInfo) {
2296 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2297 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2298 Op1Info.Signed != Op2Info.Signed;
2299}
2300
2301/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2302/// the generic checked-binop irgen.
2303static RValue
2305 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2306 WidthAndSignedness Op2Info,
2307 const clang::Expr *ResultArg, QualType ResultQTy,
2308 WidthAndSignedness ResultInfo) {
2309 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2310 Op2Info, ResultInfo) &&
2311 "Not a mixed-sign multipliction we can specialize");
2312
2313 // Emit the signed and unsigned operands.
2314 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2315 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2316 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2317 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2318 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2319 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2320
2321 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2322 if (SignedOpWidth < UnsignedOpWidth)
2323 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2324 if (UnsignedOpWidth < SignedOpWidth)
2325 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2326
2327 llvm::Type *OpTy = Signed->getType();
2328 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2329 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2330 llvm::Type *ResTy = ResultPtr.getElementType();
2331 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2332
2333 // Take the absolute value of the signed operand.
2334 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2335 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2336 llvm::Value *AbsSigned =
2337 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2338
2339 // Perform a checked unsigned multiplication.
2340 llvm::Value *UnsignedOverflow;
2341 llvm::Value *UnsignedResult =
2342 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2343 Unsigned, UnsignedOverflow);
2344
2345 llvm::Value *Overflow, *Result;
2346 if (ResultInfo.Signed) {
2347 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2348 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2349 auto IntMax =
2350 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2351 llvm::Value *MaxResult =
2352 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2353 CGF.Builder.CreateZExt(IsNegative, OpTy));
2354 llvm::Value *SignedOverflow =
2355 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2356 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2357
2358 // Prepare the signed result (possibly by negating it).
2359 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2360 llvm::Value *SignedResult =
2361 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2362 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2363 } else {
2364 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2365 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2366 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2367 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2368 if (ResultInfo.Width < OpWidth) {
2369 auto IntMax =
2370 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2371 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2372 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2373 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2374 }
2375
2376 // Negate the product if it would be negative in infinite precision.
2377 Result = CGF.Builder.CreateSelect(
2378 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2379
2380 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2381 }
2382 assert(Overflow && Result && "Missing overflow or result");
2383
2384 bool isVolatile =
2385 ResultArg->getType()->getPointeeType().isVolatileQualified();
2386 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2387 isVolatile);
2388 return RValue::get(Overflow);
2389}
2390
2391static bool
2393 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2394 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2395 Ty = Ctx.getBaseElementType(Arr);
2396
2397 const auto *Record = Ty->getAsCXXRecordDecl();
2398 if (!Record)
2399 return false;
2400
2401 // We've already checked this type, or are in the process of checking it.
2402 if (!Seen.insert(Record).second)
2403 return false;
2404
2405 assert(Record->hasDefinition() &&
2406 "Incomplete types should already be diagnosed");
2407
2408 if (Record->isDynamicClass())
2409 return true;
2410
2411 for (FieldDecl *F : Record->fields()) {
2412 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2413 return true;
2414 }
2415 return false;
2416}
2417
2418/// Determine if the specified type requires laundering by checking if it is a
2419/// dynamic class type or contains a subobject which is a dynamic class type.
2421 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2422 return false;
2424 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2425}
2426
2427RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2428 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2429 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2430
2431 // The builtin's shift arg may have a different type than the source arg and
2432 // result, but the LLVM intrinsic uses the same type for all values.
2433 llvm::Type *Ty = Src->getType();
2434 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2435
2436 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2437 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2438 Function *F = CGM.getIntrinsic(IID, Ty);
2439 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2440}
2441
2442// Map math builtins for long-double to f128 version.
2443static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2444 switch (BuiltinID) {
2445#define MUTATE_LDBL(func) \
2446 case Builtin::BI__builtin_##func##l: \
2447 return Builtin::BI__builtin_##func##f128;
2478 MUTATE_LDBL(nans)
2479 MUTATE_LDBL(inf)
2498 MUTATE_LDBL(huge_val)
2508#undef MUTATE_LDBL
2509 default:
2510 return BuiltinID;
2511 }
2512}
2513
2514static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2515 Value *V) {
2516 if (CGF.Builder.getIsFPConstrained() &&
2517 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2518 if (Value *Result =
2519 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2520 return Result;
2521 }
2522 return nullptr;
2523}
2524
2526 const FunctionDecl *FD) {
2527 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2528 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2529 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2530
2532 for (auto &&FormalTy : FnTy->params())
2533 Args.push_back(llvm::PoisonValue::get(FormalTy));
2534
2535 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2536}
2537
2538RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2539 const CallExpr *E,
2540 ReturnValueSlot ReturnValue) {
2541 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2542 // See if we can constant fold this builtin. If so, don't emit it at all.
2543 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2546 !Result.hasSideEffects()) {
2547 if (Result.Val.isInt())
2548 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2549 Result.Val.getInt()));
2550 if (Result.Val.isFloat())
2551 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2552 Result.Val.getFloat()));
2553 }
2554
2555 // If current long-double semantics is IEEE 128-bit, replace math builtins
2556 // of long-double with f128 equivalent.
2557 // TODO: This mutation should also be applied to other targets other than PPC,
2558 // after backend supports IEEE 128-bit style libcalls.
2559 if (getTarget().getTriple().isPPC64() &&
2560 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2561 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2562
2563 // If the builtin has been declared explicitly with an assembler label,
2564 // disable the specialized emitting below. Ideally we should communicate the
2565 // rename in IR, or at least avoid generating the intrinsic calls that are
2566 // likely to get lowered to the renamed library functions.
2567 const unsigned BuiltinIDIfNoAsmLabel =
2568 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2569
2570 std::optional<bool> ErrnoOverriden;
2571 // ErrnoOverriden is true if math-errno is overriden via the
2572 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2573 // which implies math-errno.
2574 if (E->hasStoredFPFeatures()) {
2575 FPOptionsOverride OP = E->getFPFeatures();
2576 if (OP.hasMathErrnoOverride())
2577 ErrnoOverriden = OP.getMathErrnoOverride();
2578 }
2579 // True if 'attribute__((optnone))' is used. This attribute overrides
2580 // fast-math which implies math-errno.
2581 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2582
2583 // True if we are compiling at -O2 and errno has been disabled
2584 // using the '#pragma float_control(precise, off)', and
2585 // attribute opt-none hasn't been seen.
2586 bool ErrnoOverridenToFalseWithOpt =
2587 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2588 CGM.getCodeGenOpts().OptimizationLevel != 0;
2589
2590 // There are LLVM math intrinsics/instructions corresponding to math library
2591 // functions except the LLVM op will never set errno while the math library
2592 // might. Also, math builtins have the same semantics as their math library
2593 // twins. Thus, we can transform math library and builtin calls to their
2594 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2595 // In case FP exceptions are enabled, the experimental versions of the
2596 // intrinsics model those.
2597 bool ConstAlways =
2598 getContext().BuiltinInfo.isConst(BuiltinID);
2599
2600 // There's a special case with the fma builtins where they are always const
2601 // if the target environment is GNU or the target is OS is Windows and we're
2602 // targeting the MSVCRT.dll environment.
2603 // FIXME: This list can be become outdated. Need to find a way to get it some
2604 // other way.
2605 switch (BuiltinID) {
2606 case Builtin::BI__builtin_fma:
2607 case Builtin::BI__builtin_fmaf:
2608 case Builtin::BI__builtin_fmal:
2609 case Builtin::BI__builtin_fmaf16:
2610 case Builtin::BIfma:
2611 case Builtin::BIfmaf:
2612 case Builtin::BIfmal: {
2613 auto &Trip = CGM.getTriple();
2614 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2615 ConstAlways = true;
2616 break;
2617 }
2618 default:
2619 break;
2620 }
2621
2622 bool ConstWithoutErrnoAndExceptions =
2624 bool ConstWithoutExceptions =
2626
2627 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2628 // disabled.
2629 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2630 // or attributes that affect math-errno should prevent or allow math
2631 // intrincs to be generated. Intrinsics are generated:
2632 // 1- In fast math mode, unless math-errno is overriden
2633 // via '#pragma float_control(precise, on)', or via an
2634 // 'attribute__((optnone))'.
2635 // 2- If math-errno was enabled on command line but overriden
2636 // to false via '#pragma float_control(precise, off))' and
2637 // 'attribute__((optnone))' hasn't been used.
2638 // 3- If we are compiling with optimization and errno has been disabled
2639 // via '#pragma float_control(precise, off)', and
2640 // 'attribute__((optnone))' hasn't been used.
2641
2642 bool ConstWithoutErrnoOrExceptions =
2643 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2644 bool GenerateIntrinsics =
2645 (ConstAlways && !OptNone) ||
2646 (!getLangOpts().MathErrno &&
2647 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2648 if (!GenerateIntrinsics) {
2649 GenerateIntrinsics =
2650 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2651 if (!GenerateIntrinsics)
2652 GenerateIntrinsics =
2653 ConstWithoutErrnoOrExceptions &&
2654 (!getLangOpts().MathErrno &&
2655 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2656 if (!GenerateIntrinsics)
2657 GenerateIntrinsics =
2658 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2659 }
2660 if (GenerateIntrinsics) {
2661 switch (BuiltinIDIfNoAsmLabel) {
2662 case Builtin::BIacos:
2663 case Builtin::BIacosf:
2664 case Builtin::BIacosl:
2665 case Builtin::BI__builtin_acos:
2666 case Builtin::BI__builtin_acosf:
2667 case Builtin::BI__builtin_acosf16:
2668 case Builtin::BI__builtin_acosl:
2669 case Builtin::BI__builtin_acosf128:
2671 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2672
2673 case Builtin::BIasin:
2674 case Builtin::BIasinf:
2675 case Builtin::BIasinl:
2676 case Builtin::BI__builtin_asin:
2677 case Builtin::BI__builtin_asinf:
2678 case Builtin::BI__builtin_asinf16:
2679 case Builtin::BI__builtin_asinl:
2680 case Builtin::BI__builtin_asinf128:
2682 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2683
2684 case Builtin::BIatan:
2685 case Builtin::BIatanf:
2686 case Builtin::BIatanl:
2687 case Builtin::BI__builtin_atan:
2688 case Builtin::BI__builtin_atanf:
2689 case Builtin::BI__builtin_atanf16:
2690 case Builtin::BI__builtin_atanl:
2691 case Builtin::BI__builtin_atanf128:
2693 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2694
2695 case Builtin::BIceil:
2696 case Builtin::BIceilf:
2697 case Builtin::BIceill:
2698 case Builtin::BI__builtin_ceil:
2699 case Builtin::BI__builtin_ceilf:
2700 case Builtin::BI__builtin_ceilf16:
2701 case Builtin::BI__builtin_ceill:
2702 case Builtin::BI__builtin_ceilf128:
2704 Intrinsic::ceil,
2705 Intrinsic::experimental_constrained_ceil));
2706
2707 case Builtin::BIcopysign:
2708 case Builtin::BIcopysignf:
2709 case Builtin::BIcopysignl:
2710 case Builtin::BI__builtin_copysign:
2711 case Builtin::BI__builtin_copysignf:
2712 case Builtin::BI__builtin_copysignf16:
2713 case Builtin::BI__builtin_copysignl:
2714 case Builtin::BI__builtin_copysignf128:
2715 return RValue::get(
2716 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2717
2718 case Builtin::BIcos:
2719 case Builtin::BIcosf:
2720 case Builtin::BIcosl:
2721 case Builtin::BI__builtin_cos:
2722 case Builtin::BI__builtin_cosf:
2723 case Builtin::BI__builtin_cosf16:
2724 case Builtin::BI__builtin_cosl:
2725 case Builtin::BI__builtin_cosf128:
2727 Intrinsic::cos,
2728 Intrinsic::experimental_constrained_cos));
2729
2730 case Builtin::BIcosh:
2731 case Builtin::BIcoshf:
2732 case Builtin::BIcoshl:
2733 case Builtin::BI__builtin_cosh:
2734 case Builtin::BI__builtin_coshf:
2735 case Builtin::BI__builtin_coshf16:
2736 case Builtin::BI__builtin_coshl:
2737 case Builtin::BI__builtin_coshf128:
2739 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
2740
2741 case Builtin::BIexp:
2742 case Builtin::BIexpf:
2743 case Builtin::BIexpl:
2744 case Builtin::BI__builtin_exp:
2745 case Builtin::BI__builtin_expf:
2746 case Builtin::BI__builtin_expf16:
2747 case Builtin::BI__builtin_expl:
2748 case Builtin::BI__builtin_expf128:
2750 Intrinsic::exp,
2751 Intrinsic::experimental_constrained_exp));
2752
2753 case Builtin::BIexp2:
2754 case Builtin::BIexp2f:
2755 case Builtin::BIexp2l:
2756 case Builtin::BI__builtin_exp2:
2757 case Builtin::BI__builtin_exp2f:
2758 case Builtin::BI__builtin_exp2f16:
2759 case Builtin::BI__builtin_exp2l:
2760 case Builtin::BI__builtin_exp2f128:
2762 Intrinsic::exp2,
2763 Intrinsic::experimental_constrained_exp2));
2764 case Builtin::BI__builtin_exp10:
2765 case Builtin::BI__builtin_exp10f:
2766 case Builtin::BI__builtin_exp10f16:
2767 case Builtin::BI__builtin_exp10l:
2768 case Builtin::BI__builtin_exp10f128: {
2769 // TODO: strictfp support
2770 if (Builder.getIsFPConstrained())
2771 break;
2772 return RValue::get(
2773 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
2774 }
2775 case Builtin::BIfabs:
2776 case Builtin::BIfabsf:
2777 case Builtin::BIfabsl:
2778 case Builtin::BI__builtin_fabs:
2779 case Builtin::BI__builtin_fabsf:
2780 case Builtin::BI__builtin_fabsf16:
2781 case Builtin::BI__builtin_fabsl:
2782 case Builtin::BI__builtin_fabsf128:
2783 return RValue::get(
2784 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
2785
2786 case Builtin::BIfloor:
2787 case Builtin::BIfloorf:
2788 case Builtin::BIfloorl:
2789 case Builtin::BI__builtin_floor:
2790 case Builtin::BI__builtin_floorf:
2791 case Builtin::BI__builtin_floorf16:
2792 case Builtin::BI__builtin_floorl:
2793 case Builtin::BI__builtin_floorf128:
2795 Intrinsic::floor,
2796 Intrinsic::experimental_constrained_floor));
2797
2798 case Builtin::BIfma:
2799 case Builtin::BIfmaf:
2800 case Builtin::BIfmal:
2801 case Builtin::BI__builtin_fma:
2802 case Builtin::BI__builtin_fmaf:
2803 case Builtin::BI__builtin_fmaf16:
2804 case Builtin::BI__builtin_fmal:
2805 case Builtin::BI__builtin_fmaf128:
2807 Intrinsic::fma,
2808 Intrinsic::experimental_constrained_fma));
2809
2810 case Builtin::BIfmax:
2811 case Builtin::BIfmaxf:
2812 case Builtin::BIfmaxl:
2813 case Builtin::BI__builtin_fmax:
2814 case Builtin::BI__builtin_fmaxf:
2815 case Builtin::BI__builtin_fmaxf16:
2816 case Builtin::BI__builtin_fmaxl:
2817 case Builtin::BI__builtin_fmaxf128:
2819 Intrinsic::maxnum,
2820 Intrinsic::experimental_constrained_maxnum));
2821
2822 case Builtin::BIfmin:
2823 case Builtin::BIfminf:
2824 case Builtin::BIfminl:
2825 case Builtin::BI__builtin_fmin:
2826 case Builtin::BI__builtin_fminf:
2827 case Builtin::BI__builtin_fminf16:
2828 case Builtin::BI__builtin_fminl:
2829 case Builtin::BI__builtin_fminf128:
2831 Intrinsic::minnum,
2832 Intrinsic::experimental_constrained_minnum));
2833
2834 // fmod() is a special-case. It maps to the frem instruction rather than an
2835 // LLVM intrinsic.
2836 case Builtin::BIfmod:
2837 case Builtin::BIfmodf:
2838 case Builtin::BIfmodl:
2839 case Builtin::BI__builtin_fmod:
2840 case Builtin::BI__builtin_fmodf:
2841 case Builtin::BI__builtin_fmodf16:
2842 case Builtin::BI__builtin_fmodl:
2843 case Builtin::BI__builtin_fmodf128: {
2844 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2845 Value *Arg1 = EmitScalarExpr(E->getArg(0));
2846 Value *Arg2 = EmitScalarExpr(E->getArg(1));
2847 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2848 }
2849
2850 case Builtin::BIlog:
2851 case Builtin::BIlogf:
2852 case Builtin::BIlogl:
2853 case Builtin::BI__builtin_log:
2854 case Builtin::BI__builtin_logf:
2855 case Builtin::BI__builtin_logf16:
2856 case Builtin::BI__builtin_logl:
2857 case Builtin::BI__builtin_logf128:
2859 Intrinsic::log,
2860 Intrinsic::experimental_constrained_log));
2861
2862 case Builtin::BIlog10:
2863 case Builtin::BIlog10f:
2864 case Builtin::BIlog10l:
2865 case Builtin::BI__builtin_log10:
2866 case Builtin::BI__builtin_log10f:
2867 case Builtin::BI__builtin_log10f16:
2868 case Builtin::BI__builtin_log10l:
2869 case Builtin::BI__builtin_log10f128:
2871 Intrinsic::log10,
2872 Intrinsic::experimental_constrained_log10));
2873
2874 case Builtin::BIlog2:
2875 case Builtin::BIlog2f:
2876 case Builtin::BIlog2l:
2877 case Builtin::BI__builtin_log2:
2878 case Builtin::BI__builtin_log2f:
2879 case Builtin::BI__builtin_log2f16:
2880 case Builtin::BI__builtin_log2l:
2881 case Builtin::BI__builtin_log2f128:
2883 Intrinsic::log2,
2884 Intrinsic::experimental_constrained_log2));
2885
2886 case Builtin::BInearbyint:
2887 case Builtin::BInearbyintf:
2888 case Builtin::BInearbyintl:
2889 case Builtin::BI__builtin_nearbyint:
2890 case Builtin::BI__builtin_nearbyintf:
2891 case Builtin::BI__builtin_nearbyintl:
2892 case Builtin::BI__builtin_nearbyintf128:
2894 Intrinsic::nearbyint,
2895 Intrinsic::experimental_constrained_nearbyint));
2896
2897 case Builtin::BIpow:
2898 case Builtin::BIpowf:
2899 case Builtin::BIpowl:
2900 case Builtin::BI__builtin_pow:
2901 case Builtin::BI__builtin_powf:
2902 case Builtin::BI__builtin_powf16:
2903 case Builtin::BI__builtin_powl:
2904 case Builtin::BI__builtin_powf128:
2906 Intrinsic::pow,
2907 Intrinsic::experimental_constrained_pow));
2908
2909 case Builtin::BIrint:
2910 case Builtin::BIrintf:
2911 case Builtin::BIrintl:
2912 case Builtin::BI__builtin_rint:
2913 case Builtin::BI__builtin_rintf:
2914 case Builtin::BI__builtin_rintf16:
2915 case Builtin::BI__builtin_rintl:
2916 case Builtin::BI__builtin_rintf128:
2918 Intrinsic::rint,
2919 Intrinsic::experimental_constrained_rint));
2920
2921 case Builtin::BIround:
2922 case Builtin::BIroundf:
2923 case Builtin::BIroundl:
2924 case Builtin::BI__builtin_round:
2925 case Builtin::BI__builtin_roundf:
2926 case Builtin::BI__builtin_roundf16:
2927 case Builtin::BI__builtin_roundl:
2928 case Builtin::BI__builtin_roundf128:
2930 Intrinsic::round,
2931 Intrinsic::experimental_constrained_round));
2932
2933 case Builtin::BIroundeven:
2934 case Builtin::BIroundevenf:
2935 case Builtin::BIroundevenl:
2936 case Builtin::BI__builtin_roundeven:
2937 case Builtin::BI__builtin_roundevenf:
2938 case Builtin::BI__builtin_roundevenf16:
2939 case Builtin::BI__builtin_roundevenl:
2940 case Builtin::BI__builtin_roundevenf128:
2942 Intrinsic::roundeven,
2943 Intrinsic::experimental_constrained_roundeven));
2944
2945 case Builtin::BIsin:
2946 case Builtin::BIsinf:
2947 case Builtin::BIsinl:
2948 case Builtin::BI__builtin_sin:
2949 case Builtin::BI__builtin_sinf:
2950 case Builtin::BI__builtin_sinf16:
2951 case Builtin::BI__builtin_sinl:
2952 case Builtin::BI__builtin_sinf128:
2954 Intrinsic::sin,
2955 Intrinsic::experimental_constrained_sin));
2956
2957 case Builtin::BIsinh:
2958 case Builtin::BIsinhf:
2959 case Builtin::BIsinhl:
2960 case Builtin::BI__builtin_sinh:
2961 case Builtin::BI__builtin_sinhf:
2962 case Builtin::BI__builtin_sinhf16:
2963 case Builtin::BI__builtin_sinhl:
2964 case Builtin::BI__builtin_sinhf128:
2966 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
2967
2968 case Builtin::BIsqrt:
2969 case Builtin::BIsqrtf:
2970 case Builtin::BIsqrtl:
2971 case Builtin::BI__builtin_sqrt:
2972 case Builtin::BI__builtin_sqrtf:
2973 case Builtin::BI__builtin_sqrtf16:
2974 case Builtin::BI__builtin_sqrtl:
2975 case Builtin::BI__builtin_sqrtf128:
2976 case Builtin::BI__builtin_elementwise_sqrt: {
2978 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
2980 return RValue::get(Call);
2981 }
2982
2983 case Builtin::BItan:
2984 case Builtin::BItanf:
2985 case Builtin::BItanl:
2986 case Builtin::BI__builtin_tan:
2987 case Builtin::BI__builtin_tanf:
2988 case Builtin::BI__builtin_tanf16:
2989 case Builtin::BI__builtin_tanl:
2990 case Builtin::BI__builtin_tanf128:
2992 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
2993
2994 case Builtin::BItanh:
2995 case Builtin::BItanhf:
2996 case Builtin::BItanhl:
2997 case Builtin::BI__builtin_tanh:
2998 case Builtin::BI__builtin_tanhf:
2999 case Builtin::BI__builtin_tanhf16:
3000 case Builtin::BI__builtin_tanhl:
3001 case Builtin::BI__builtin_tanhf128:
3003 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3004
3005 case Builtin::BItrunc:
3006 case Builtin::BItruncf:
3007 case Builtin::BItruncl:
3008 case Builtin::BI__builtin_trunc:
3009 case Builtin::BI__builtin_truncf:
3010 case Builtin::BI__builtin_truncf16:
3011 case Builtin::BI__builtin_truncl:
3012 case Builtin::BI__builtin_truncf128:
3014 Intrinsic::trunc,
3015 Intrinsic::experimental_constrained_trunc));
3016
3017 case Builtin::BIlround:
3018 case Builtin::BIlroundf:
3019 case Builtin::BIlroundl:
3020 case Builtin::BI__builtin_lround:
3021 case Builtin::BI__builtin_lroundf:
3022 case Builtin::BI__builtin_lroundl:
3023 case Builtin::BI__builtin_lroundf128:
3025 *this, E, Intrinsic::lround,
3026 Intrinsic::experimental_constrained_lround));
3027
3028 case Builtin::BIllround:
3029 case Builtin::BIllroundf:
3030 case Builtin::BIllroundl:
3031 case Builtin::BI__builtin_llround:
3032 case Builtin::BI__builtin_llroundf:
3033 case Builtin::BI__builtin_llroundl:
3034 case Builtin::BI__builtin_llroundf128:
3036 *this, E, Intrinsic::llround,
3037 Intrinsic::experimental_constrained_llround));
3038
3039 case Builtin::BIlrint:
3040 case Builtin::BIlrintf:
3041 case Builtin::BIlrintl:
3042 case Builtin::BI__builtin_lrint:
3043 case Builtin::BI__builtin_lrintf:
3044 case Builtin::BI__builtin_lrintl:
3045 case Builtin::BI__builtin_lrintf128:
3047 *this, E, Intrinsic::lrint,
3048 Intrinsic::experimental_constrained_lrint));
3049
3050 case Builtin::BIllrint:
3051 case Builtin::BIllrintf:
3052 case Builtin::BIllrintl:
3053 case Builtin::BI__builtin_llrint:
3054 case Builtin::BI__builtin_llrintf:
3055 case Builtin::BI__builtin_llrintl:
3056 case Builtin::BI__builtin_llrintf128:
3058 *this, E, Intrinsic::llrint,
3059 Intrinsic::experimental_constrained_llrint));
3060 case Builtin::BI__builtin_ldexp:
3061 case Builtin::BI__builtin_ldexpf:
3062 case Builtin::BI__builtin_ldexpl:
3063 case Builtin::BI__builtin_ldexpf16:
3064 case Builtin::BI__builtin_ldexpf128: {
3066 *this, E, Intrinsic::ldexp,
3067 Intrinsic::experimental_constrained_ldexp));
3068 }
3069 default:
3070 break;
3071 }
3072 }
3073
3074 // Check NonnullAttribute/NullabilityArg and Alignment.
3075 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3076 unsigned ParmNum) {
3077 Value *Val = A.emitRawPointer(*this);
3078 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3079 ParmNum);
3080
3081 if (SanOpts.has(SanitizerKind::Alignment)) {
3082 SanitizerSet SkippedChecks;
3083 SkippedChecks.set(SanitizerKind::All);
3084 SkippedChecks.clear(SanitizerKind::Alignment);
3085 SourceLocation Loc = Arg->getExprLoc();
3086 // Strip an implicit cast.
3087 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3088 if (CE->getCastKind() == CK_BitCast)
3089 Arg = CE->getSubExpr();
3090 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3091 SkippedChecks);
3092 }
3093 };
3094
3095 switch (BuiltinIDIfNoAsmLabel) {
3096 default: break;
3097 case Builtin::BI__builtin___CFStringMakeConstantString:
3098 case Builtin::BI__builtin___NSStringMakeConstantString:
3099 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3100 case Builtin::BI__builtin_stdarg_start:
3101 case Builtin::BI__builtin_va_start:
3102 case Builtin::BI__va_start:
3103 case Builtin::BI__builtin_va_end:
3104 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3105 ? EmitScalarExpr(E->getArg(0))
3106 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3107 BuiltinID != Builtin::BI__builtin_va_end);
3108 return RValue::get(nullptr);
3109 case Builtin::BI__builtin_va_copy: {
3110 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3111 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3112 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3113 {DstPtr, SrcPtr});
3114 return RValue::get(nullptr);
3115 }
3116 case Builtin::BIabs:
3117 case Builtin::BIlabs:
3118 case Builtin::BIllabs:
3119 case Builtin::BI__builtin_abs:
3120 case Builtin::BI__builtin_labs:
3121 case Builtin::BI__builtin_llabs: {
3122 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3123
3124 Value *Result;
3125 switch (getLangOpts().getSignedOverflowBehavior()) {
3127 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3128 break;
3130 if (!SanitizeOverflow) {
3131 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3132 break;
3133 }
3134 [[fallthrough]];
3136 // TODO: Somehow handle the corner case when the address of abs is taken.
3137 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3138 break;
3139 }
3140 return RValue::get(Result);
3141 }
3142 case Builtin::BI__builtin_complex: {
3143 Value *Real = EmitScalarExpr(E->getArg(0));
3144 Value *Imag = EmitScalarExpr(E->getArg(1));
3145 return RValue::getComplex({Real, Imag});
3146 }
3147 case Builtin::BI__builtin_conj:
3148 case Builtin::BI__builtin_conjf:
3149 case Builtin::BI__builtin_conjl:
3150 case Builtin::BIconj:
3151 case Builtin::BIconjf:
3152 case Builtin::BIconjl: {
3153 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3154 Value *Real = ComplexVal.first;
3155 Value *Imag = ComplexVal.second;
3156 Imag = Builder.CreateFNeg(Imag, "neg");
3157 return RValue::getComplex(std::make_pair(Real, Imag));
3158 }
3159 case Builtin::BI__builtin_creal:
3160 case Builtin::BI__builtin_crealf:
3161 case Builtin::BI__builtin_creall:
3162 case Builtin::BIcreal:
3163 case Builtin::BIcrealf:
3164 case Builtin::BIcreall: {
3165 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3166 return RValue::get(ComplexVal.first);
3167 }
3168
3169 case Builtin::BI__builtin_preserve_access_index: {
3170 // Only enabled preserved access index region when debuginfo
3171 // is available as debuginfo is needed to preserve user-level
3172 // access pattern.
3173 if (!getDebugInfo()) {
3174 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3175 return RValue::get(EmitScalarExpr(E->getArg(0)));
3176 }
3177
3178 // Nested builtin_preserve_access_index() not supported
3180 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3181 return RValue::get(EmitScalarExpr(E->getArg(0)));
3182 }
3183
3184 IsInPreservedAIRegion = true;
3185 Value *Res = EmitScalarExpr(E->getArg(0));
3186 IsInPreservedAIRegion = false;
3187 return RValue::get(Res);
3188 }
3189
3190 case Builtin::BI__builtin_cimag:
3191 case Builtin::BI__builtin_cimagf:
3192 case Builtin::BI__builtin_cimagl:
3193 case Builtin::BIcimag:
3194 case Builtin::BIcimagf:
3195 case Builtin::BIcimagl: {
3196 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3197 return RValue::get(ComplexVal.second);
3198 }
3199
3200 case Builtin::BI__builtin_clrsb:
3201 case Builtin::BI__builtin_clrsbl:
3202 case Builtin::BI__builtin_clrsbll: {
3203 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3204 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3205
3206 llvm::Type *ArgType = ArgValue->getType();
3207 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3208
3209 llvm::Type *ResultType = ConvertType(E->getType());
3210 Value *Zero = llvm::Constant::getNullValue(ArgType);
3211 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3212 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3213 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3214 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3215 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3216 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3217 "cast");
3218 return RValue::get(Result);
3219 }
3220 case Builtin::BI__builtin_ctzs:
3221 case Builtin::BI__builtin_ctz:
3222 case Builtin::BI__builtin_ctzl:
3223 case Builtin::BI__builtin_ctzll:
3224 case Builtin::BI__builtin_ctzg: {
3225 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3226 E->getNumArgs() > 1;
3227
3228 Value *ArgValue =
3229 HasFallback ? EmitScalarExpr(E->getArg(0))
3231
3232 llvm::Type *ArgType = ArgValue->getType();
3233 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3234
3235 llvm::Type *ResultType = ConvertType(E->getType());
3236 Value *ZeroUndef =
3237 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3238 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3239 if (Result->getType() != ResultType)
3240 Result =
3241 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3242 if (!HasFallback)
3243 return RValue::get(Result);
3244
3245 Value *Zero = Constant::getNullValue(ArgType);
3246 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3247 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3248 Value *ResultOrFallback =
3249 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3250 return RValue::get(ResultOrFallback);
3251 }
3252 case Builtin::BI__builtin_clzs:
3253 case Builtin::BI__builtin_clz:
3254 case Builtin::BI__builtin_clzl:
3255 case Builtin::BI__builtin_clzll:
3256 case Builtin::BI__builtin_clzg: {
3257 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3258 E->getNumArgs() > 1;
3259
3260 Value *ArgValue =
3261 HasFallback ? EmitScalarExpr(E->getArg(0))
3263
3264 llvm::Type *ArgType = ArgValue->getType();
3265 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3266
3267 llvm::Type *ResultType = ConvertType(E->getType());
3268 Value *ZeroUndef =
3269 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3270 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3271 if (Result->getType() != ResultType)
3272 Result =
3273 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3274 if (!HasFallback)
3275 return RValue::get(Result);
3276
3277 Value *Zero = Constant::getNullValue(ArgType);
3278 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3279 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3280 Value *ResultOrFallback =
3281 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3282 return RValue::get(ResultOrFallback);
3283 }
3284 case Builtin::BI__builtin_ffs:
3285 case Builtin::BI__builtin_ffsl:
3286 case Builtin::BI__builtin_ffsll: {
3287 // ffs(x) -> x ? cttz(x) + 1 : 0
3288 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3289
3290 llvm::Type *ArgType = ArgValue->getType();
3291 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3292
3293 llvm::Type *ResultType = ConvertType(E->getType());
3294 Value *Tmp =
3295 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3296 llvm::ConstantInt::get(ArgType, 1));
3297 Value *Zero = llvm::Constant::getNullValue(ArgType);
3298 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3299 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3300 if (Result->getType() != ResultType)
3301 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3302 "cast");
3303 return RValue::get(Result);
3304 }
3305 case Builtin::BI__builtin_parity:
3306 case Builtin::BI__builtin_parityl:
3307 case Builtin::BI__builtin_parityll: {
3308 // parity(x) -> ctpop(x) & 1
3309 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3310
3311 llvm::Type *ArgType = ArgValue->getType();
3312 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3313
3314 llvm::Type *ResultType = ConvertType(E->getType());
3315 Value *Tmp = Builder.CreateCall(F, ArgValue);
3316 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3317 if (Result->getType() != ResultType)
3318 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3319 "cast");
3320 return RValue::get(Result);
3321 }
3322 case Builtin::BI__lzcnt16:
3323 case Builtin::BI__lzcnt:
3324 case Builtin::BI__lzcnt64: {
3325 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3326
3327 llvm::Type *ArgType = ArgValue->getType();
3328 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3329
3330 llvm::Type *ResultType = ConvertType(E->getType());
3331 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3332 if (Result->getType() != ResultType)
3333 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3334 "cast");
3335 return RValue::get(Result);
3336 }
3337 case Builtin::BI__popcnt16:
3338 case Builtin::BI__popcnt:
3339 case Builtin::BI__popcnt64:
3340 case Builtin::BI__builtin_popcount:
3341 case Builtin::BI__builtin_popcountl:
3342 case Builtin::BI__builtin_popcountll:
3343 case Builtin::BI__builtin_popcountg: {
3344 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3345
3346 llvm::Type *ArgType = ArgValue->getType();
3347 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3348
3349 llvm::Type *ResultType = ConvertType(E->getType());
3350 Value *Result = Builder.CreateCall(F, ArgValue);
3351 if (Result->getType() != ResultType)
3352 Result =
3353 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3354 return RValue::get(Result);
3355 }
3356 case Builtin::BI__builtin_unpredictable: {
3357 // Always return the argument of __builtin_unpredictable. LLVM does not
3358 // handle this builtin. Metadata for this builtin should be added directly
3359 // to instructions such as branches or switches that use it.
3360 return RValue::get(EmitScalarExpr(E->getArg(0)));
3361 }
3362 case Builtin::BI__builtin_expect: {
3363 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3364 llvm::Type *ArgType = ArgValue->getType();
3365
3366 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3367 // Don't generate llvm.expect on -O0 as the backend won't use it for
3368 // anything.
3369 // Note, we still IRGen ExpectedValue because it could have side-effects.
3370 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3371 return RValue::get(ArgValue);
3372
3373 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3374 Value *Result =
3375 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3376 return RValue::get(Result);
3377 }
3378 case Builtin::BI__builtin_expect_with_probability: {
3379 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3380 llvm::Type *ArgType = ArgValue->getType();
3381
3382 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3383 llvm::APFloat Probability(0.0);
3384 const Expr *ProbArg = E->getArg(2);
3385 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3386 assert(EvalSucceed && "probability should be able to evaluate as float");
3387 (void)EvalSucceed;
3388 bool LoseInfo = false;
3389 Probability.convert(llvm::APFloat::IEEEdouble(),
3390 llvm::RoundingMode::Dynamic, &LoseInfo);
3391 llvm::Type *Ty = ConvertType(ProbArg->getType());
3392 Constant *Confidence = ConstantFP::get(Ty, Probability);
3393 // Don't generate llvm.expect.with.probability on -O0 as the backend
3394 // won't use it for anything.
3395 // Note, we still IRGen ExpectedValue because it could have side-effects.
3396 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3397 return RValue::get(ArgValue);
3398
3399 Function *FnExpect =
3400 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3401 Value *Result = Builder.CreateCall(
3402 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3403 return RValue::get(Result);
3404 }
3405 case Builtin::BI__builtin_assume_aligned: {
3406 const Expr *Ptr = E->getArg(0);
3407 Value *PtrValue = EmitScalarExpr(Ptr);
3408 Value *OffsetValue =
3409 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3410
3411 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3412 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3413 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3414 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3415 llvm::Value::MaximumAlignment);
3416
3417 emitAlignmentAssumption(PtrValue, Ptr,
3418 /*The expr loc is sufficient.*/ SourceLocation(),
3419 AlignmentCI, OffsetValue);
3420 return RValue::get(PtrValue);
3421 }
3422 case Builtin::BI__assume:
3423 case Builtin::BI__builtin_assume: {
3424 if (E->getArg(0)->HasSideEffects(getContext()))
3425 return RValue::get(nullptr);
3426
3427 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3428 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3429 Builder.CreateCall(FnAssume, ArgValue);
3430 return RValue::get(nullptr);
3431 }
3432 case Builtin::BI__builtin_assume_separate_storage: {
3433 const Expr *Arg0 = E->getArg(0);
3434 const Expr *Arg1 = E->getArg(1);
3435
3436 Value *Value0 = EmitScalarExpr(Arg0);
3437 Value *Value1 = EmitScalarExpr(Arg1);
3438
3439 Value *Values[] = {Value0, Value1};
3440 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3441 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3442 return RValue::get(nullptr);
3443 }
3444 case Builtin::BI__builtin_allow_runtime_check: {
3445 StringRef Kind =
3446 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3447 LLVMContext &Ctx = CGM.getLLVMContext();
3448 llvm::Value *Allow = Builder.CreateCall(
3449 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3450 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3451 return RValue::get(Allow);
3452 }
3453 case Builtin::BI__arithmetic_fence: {
3454 // Create the builtin call if FastMath is selected, and the target
3455 // supports the builtin, otherwise just return the argument.
3456 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3457 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3458 bool isArithmeticFenceEnabled =
3459 FMF.allowReassoc() &&
3461 QualType ArgType = E->getArg(0)->getType();
3462 if (ArgType->isComplexType()) {
3463 if (isArithmeticFenceEnabled) {
3464 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3465 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3466 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3467 ConvertType(ElementType));
3468 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3469 ConvertType(ElementType));
3470 return RValue::getComplex(std::make_pair(Real, Imag));
3471 }
3472 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3473 Value *Real = ComplexVal.first;
3474 Value *Imag = ComplexVal.second;
3475 return RValue::getComplex(std::make_pair(Real, Imag));
3476 }
3477 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3478 if (isArithmeticFenceEnabled)
3479 return RValue::get(
3480 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3481 return RValue::get(ArgValue);
3482 }
3483 case Builtin::BI__builtin_bswap16:
3484 case Builtin::BI__builtin_bswap32:
3485 case Builtin::BI__builtin_bswap64:
3486 case Builtin::BI_byteswap_ushort:
3487 case Builtin::BI_byteswap_ulong:
3488 case Builtin::BI_byteswap_uint64: {
3489 return RValue::get(
3490 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3491 }
3492 case Builtin::BI__builtin_bitreverse8:
3493 case Builtin::BI__builtin_bitreverse16:
3494 case Builtin::BI__builtin_bitreverse32:
3495 case Builtin::BI__builtin_bitreverse64: {
3496 return RValue::get(
3497 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3498 }
3499 case Builtin::BI__builtin_rotateleft8:
3500 case Builtin::BI__builtin_rotateleft16:
3501 case Builtin::BI__builtin_rotateleft32:
3502 case Builtin::BI__builtin_rotateleft64:
3503 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3504 case Builtin::BI_rotl16:
3505 case Builtin::BI_rotl:
3506 case Builtin::BI_lrotl:
3507 case Builtin::BI_rotl64:
3508 return emitRotate(E, false);
3509
3510 case Builtin::BI__builtin_rotateright8:
3511 case Builtin::BI__builtin_rotateright16:
3512 case Builtin::BI__builtin_rotateright32:
3513 case Builtin::BI__builtin_rotateright64:
3514 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3515 case Builtin::BI_rotr16:
3516 case Builtin::BI_rotr:
3517 case Builtin::BI_lrotr:
3518 case Builtin::BI_rotr64:
3519 return emitRotate(E, true);
3520
3521 case Builtin::BI__builtin_constant_p: {
3522 llvm::Type *ResultType = ConvertType(E->getType());
3523
3524 const Expr *Arg = E->getArg(0);
3525 QualType ArgType = Arg->getType();
3526 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3527 // and likely a mistake.
3528 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3529 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3530 // Per the GCC documentation, only numeric constants are recognized after
3531 // inlining.
3532 return RValue::get(ConstantInt::get(ResultType, 0));
3533
3534 if (Arg->HasSideEffects(getContext()))
3535 // The argument is unevaluated, so be conservative if it might have
3536 // side-effects.
3537 return RValue::get(ConstantInt::get(ResultType, 0));
3538
3539 Value *ArgValue = EmitScalarExpr(Arg);
3540 if (ArgType->isObjCObjectPointerType()) {
3541 // Convert Objective-C objects to id because we cannot distinguish between
3542 // LLVM types for Obj-C classes as they are opaque.
3543 ArgType = CGM.getContext().getObjCIdType();
3544 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3545 }
3546 Function *F =
3547 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3548 Value *Result = Builder.CreateCall(F, ArgValue);
3549 if (Result->getType() != ResultType)
3550 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3551 return RValue::get(Result);
3552 }
3553 case Builtin::BI__builtin_dynamic_object_size:
3554 case Builtin::BI__builtin_object_size: {
3555 unsigned Type =
3556 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3557 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3558
3559 // We pass this builtin onto the optimizer so that it can figure out the
3560 // object size in more complex cases.
3561 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3562 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3563 /*EmittedE=*/nullptr, IsDynamic));
3564 }
3565 case Builtin::BI__builtin_prefetch: {
3566 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3567 // FIXME: Technically these constants should of type 'int', yes?
3568 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3569 llvm::ConstantInt::get(Int32Ty, 0);
3570 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3571 llvm::ConstantInt::get(Int32Ty, 3);
3572 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3573 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3574 Builder.CreateCall(F, {Address, RW, Locality, Data});
3575 return RValue::get(nullptr);
3576 }
3577 case Builtin::BI__builtin_readcyclecounter: {
3578 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3579 return RValue::get(Builder.CreateCall(F));
3580 }
3581 case Builtin::BI__builtin_readsteadycounter: {
3582 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3583 return RValue::get(Builder.CreateCall(F));
3584 }
3585 case Builtin::BI__builtin___clear_cache: {
3586 Value *Begin = EmitScalarExpr(E->getArg(0));
3587 Value *End = EmitScalarExpr(E->getArg(1));
3588 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3589 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3590 }
3591 case Builtin::BI__builtin_trap:
3592 EmitTrapCall(Intrinsic::trap);
3593 return RValue::get(nullptr);
3594 case Builtin::BI__builtin_verbose_trap: {
3595 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3596 if (getDebugInfo()) {
3597 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3598 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3599 *E->getArg(1)->tryEvaluateString(getContext()));
3600 }
3601 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3602 // Currently no attempt is made to prevent traps from being merged.
3603 EmitTrapCall(Intrinsic::trap);
3604 return RValue::get(nullptr);
3605 }
3606 case Builtin::BI__debugbreak:
3607 EmitTrapCall(Intrinsic::debugtrap);
3608 return RValue::get(nullptr);
3609 case Builtin::BI__builtin_unreachable: {
3611
3612 // We do need to preserve an insertion point.
3613 EmitBlock(createBasicBlock("unreachable.cont"));
3614
3615 return RValue::get(nullptr);
3616 }
3617
3618 case Builtin::BI__builtin_powi:
3619 case Builtin::BI__builtin_powif:
3620 case Builtin::BI__builtin_powil: {
3621 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3622 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3623
3624 if (Builder.getIsFPConstrained()) {
3625 // FIXME: llvm.powi has 2 mangling types,
3626 // llvm.experimental.constrained.powi has one.
3627 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3628 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3629 Src0->getType());
3630 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3631 }
3632
3633 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3634 { Src0->getType(), Src1->getType() });
3635 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3636 }
3637 case Builtin::BI__builtin_frexpl: {
3638 // Linux PPC will not be adding additional PPCDoubleDouble support.
3639 // WIP to switch default to IEEE long double. Will emit libcall for
3640 // frexpl instead of legalizing this type in the BE.
3641 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3642 break;
3643 [[fallthrough]];
3644 }
3645 case Builtin::BI__builtin_frexp:
3646 case Builtin::BI__builtin_frexpf:
3647 case Builtin::BI__builtin_frexpf128:
3648 case Builtin::BI__builtin_frexpf16:
3649 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3650 case Builtin::BI__builtin_isgreater:
3651 case Builtin::BI__builtin_isgreaterequal:
3652 case Builtin::BI__builtin_isless:
3653 case Builtin::BI__builtin_islessequal:
3654 case Builtin::BI__builtin_islessgreater:
3655 case Builtin::BI__builtin_isunordered: {
3656 // Ordered comparisons: we know the arguments to these are matching scalar
3657 // floating point values.
3658 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3659 Value *LHS = EmitScalarExpr(E->getArg(0));
3660 Value *RHS = EmitScalarExpr(E->getArg(1));
3661
3662 switch (BuiltinID) {
3663 default: llvm_unreachable("Unknown ordered comparison");
3664 case Builtin::BI__builtin_isgreater:
3665 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3666 break;
3667 case Builtin::BI__builtin_isgreaterequal:
3668 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3669 break;
3670 case Builtin::BI__builtin_isless:
3671 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3672 break;
3673 case Builtin::BI__builtin_islessequal:
3674 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3675 break;
3676 case Builtin::BI__builtin_islessgreater:
3677 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3678 break;
3679 case Builtin::BI__builtin_isunordered:
3680 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3681 break;
3682 }
3683 // ZExt bool to int type.
3684 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3685 }
3686
3687 case Builtin::BI__builtin_isnan: {
3688 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3689 Value *V = EmitScalarExpr(E->getArg(0));
3690 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3691 return RValue::get(Result);
3692 return RValue::get(
3693 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3694 ConvertType(E->getType())));
3695 }
3696
3697 case Builtin::BI__builtin_issignaling: {
3698 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3699 Value *V = EmitScalarExpr(E->getArg(0));
3700 return RValue::get(
3701 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3702 ConvertType(E->getType())));
3703 }
3704
3705 case Builtin::BI__builtin_isinf: {
3706 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3707 Value *V = EmitScalarExpr(E->getArg(0));
3708 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3709 return RValue::get(Result);
3710 return RValue::get(
3711 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
3712 ConvertType(E->getType())));
3713 }
3714
3715 case Builtin::BIfinite:
3716 case Builtin::BI__finite:
3717 case Builtin::BIfinitef:
3718 case Builtin::BI__finitef:
3719 case Builtin::BIfinitel:
3720 case Builtin::BI__finitel:
3721 case Builtin::BI__builtin_isfinite: {
3722 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3723 Value *V = EmitScalarExpr(E->getArg(0));
3724 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3725 return RValue::get(Result);
3726 return RValue::get(
3727 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
3728 ConvertType(E->getType())));
3729 }
3730
3731 case Builtin::BI__builtin_isnormal: {
3732 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3733 Value *V = EmitScalarExpr(E->getArg(0));
3734 return RValue::get(
3735 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
3736 ConvertType(E->getType())));
3737 }
3738
3739 case Builtin::BI__builtin_issubnormal: {
3740 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3741 Value *V = EmitScalarExpr(E->getArg(0));
3742 return RValue::get(
3743 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
3744 ConvertType(E->getType())));
3745 }
3746
3747 case Builtin::BI__builtin_iszero: {
3748 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3749 Value *V = EmitScalarExpr(E->getArg(0));
3750 return RValue::get(
3751 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
3752 ConvertType(E->getType())));
3753 }
3754
3755 case Builtin::BI__builtin_isfpclass: {
3757 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
3758 break;
3759 uint64_t Test = Result.Val.getInt().getLimitedValue();
3760 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3761 Value *V = EmitScalarExpr(E->getArg(0));
3762 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
3763 ConvertType(E->getType())));
3764 }
3765
3766 case Builtin::BI__builtin_nondeterministic_value: {
3767 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
3768
3769 Value *Result = PoisonValue::get(Ty);
3770 Result = Builder.CreateFreeze(Result);
3771
3772 return RValue::get(Result);
3773 }
3774
3775 case Builtin::BI__builtin_elementwise_abs: {
3776 Value *Result;
3777 QualType QT = E->getArg(0)->getType();
3778
3779 if (auto *VecTy = QT->getAs<VectorType>())
3780 QT = VecTy->getElementType();
3781 if (QT->isIntegerType())
3782 Result = Builder.CreateBinaryIntrinsic(
3783 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3784 Builder.getFalse(), nullptr, "elt.abs");
3785 else
3786 Result = emitBuiltinWithOneOverloadedType<1>(
3787 *this, E, llvm::Intrinsic::fabs, "elt.abs");
3788
3789 return RValue::get(Result);
3790 }
3791 case Builtin::BI__builtin_elementwise_acos:
3792 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3793 *this, E, llvm::Intrinsic::acos, "elt.acos"));
3794 case Builtin::BI__builtin_elementwise_asin:
3795 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3796 *this, E, llvm::Intrinsic::asin, "elt.asin"));
3797 case Builtin::BI__builtin_elementwise_atan:
3798 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3799 *this, E, llvm::Intrinsic::atan, "elt.atan"));
3800 case Builtin::BI__builtin_elementwise_ceil:
3801 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3802 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3803 case Builtin::BI__builtin_elementwise_exp:
3804 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3805 *this, E, llvm::Intrinsic::exp, "elt.exp"));
3806 case Builtin::BI__builtin_elementwise_exp2:
3807 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3808 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3809 case Builtin::BI__builtin_elementwise_log:
3810 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3811 *this, E, llvm::Intrinsic::log, "elt.log"));
3812 case Builtin::BI__builtin_elementwise_log2:
3813 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3814 *this, E, llvm::Intrinsic::log2, "elt.log2"));
3815 case Builtin::BI__builtin_elementwise_log10:
3816 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3817 *this, E, llvm::Intrinsic::log10, "elt.log10"));
3818 case Builtin::BI__builtin_elementwise_pow: {
3819 return RValue::get(
3820 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
3821 }
3822 case Builtin::BI__builtin_elementwise_bitreverse:
3823 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3824 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
3825 case Builtin::BI__builtin_elementwise_cos:
3826 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3827 *this, E, llvm::Intrinsic::cos, "elt.cos"));
3828 case Builtin::BI__builtin_elementwise_cosh:
3829 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3830 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
3831 case Builtin::BI__builtin_elementwise_floor:
3832 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3833 *this, E, llvm::Intrinsic::floor, "elt.floor"));
3834 case Builtin::BI__builtin_elementwise_roundeven:
3835 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3836 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
3837 case Builtin::BI__builtin_elementwise_round:
3838 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3839 *this, E, llvm::Intrinsic::round, "elt.round"));
3840 case Builtin::BI__builtin_elementwise_rint:
3841 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3842 *this, E, llvm::Intrinsic::rint, "elt.rint"));
3843 case Builtin::BI__builtin_elementwise_nearbyint:
3844 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3845 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
3846 case Builtin::BI__builtin_elementwise_sin:
3847 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3848 *this, E, llvm::Intrinsic::sin, "elt.sin"));
3849 case Builtin::BI__builtin_elementwise_sinh:
3850 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3851 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
3852 case Builtin::BI__builtin_elementwise_tan:
3853 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3854 *this, E, llvm::Intrinsic::tan, "elt.tan"));
3855 case Builtin::BI__builtin_elementwise_tanh:
3856 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3857 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
3858 case Builtin::BI__builtin_elementwise_trunc:
3859 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3860 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3861 case Builtin::BI__builtin_elementwise_canonicalize:
3862 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3863 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3864 case Builtin::BI__builtin_elementwise_copysign:
3865 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
3866 *this, E, llvm::Intrinsic::copysign));
3867 case Builtin::BI__builtin_elementwise_fma:
3868 return RValue::get(
3869 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
3870 case Builtin::BI__builtin_elementwise_add_sat:
3871 case Builtin::BI__builtin_elementwise_sub_sat: {
3872 Value *Op0 = EmitScalarExpr(E->getArg(0));
3873 Value *Op1 = EmitScalarExpr(E->getArg(1));
3874 Value *Result;
3875 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3876 QualType Ty = E->getArg(0)->getType();
3877 if (auto *VecTy = Ty->getAs<VectorType>())
3878 Ty = VecTy->getElementType();
3879 bool IsSigned = Ty->isSignedIntegerType();
3880 unsigned Opc;
3881 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3882 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3883 else
3884 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3885 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3886 return RValue::get(Result);
3887 }
3888
3889 case Builtin::BI__builtin_elementwise_max: {
3890 Value *Op0 = EmitScalarExpr(E->getArg(0));
3891 Value *Op1 = EmitScalarExpr(E->getArg(1));
3892 Value *Result;
3893 if (Op0->getType()->isIntOrIntVectorTy()) {
3894 QualType Ty = E->getArg(0)->getType();
3895 if (auto *VecTy = Ty->getAs<VectorType>())
3896 Ty = VecTy->getElementType();
3897 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3898 ? llvm::Intrinsic::smax
3899 : llvm::Intrinsic::umax,
3900 Op0, Op1, nullptr, "elt.max");
3901 } else
3902 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3903 return RValue::get(Result);
3904 }
3905 case Builtin::BI__builtin_elementwise_min: {
3906 Value *Op0 = EmitScalarExpr(E->getArg(0));
3907 Value *Op1 = EmitScalarExpr(E->getArg(1));
3908 Value *Result;
3909 if (Op0->getType()->isIntOrIntVectorTy()) {
3910 QualType Ty = E->getArg(0)->getType();
3911 if (auto *VecTy = Ty->getAs<VectorType>())
3912 Ty = VecTy->getElementType();
3913 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3914 ? llvm::Intrinsic::smin
3915 : llvm::Intrinsic::umin,
3916 Op0, Op1, nullptr, "elt.min");
3917 } else
3918 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3919 return RValue::get(Result);
3920 }
3921
3922 case Builtin::BI__builtin_reduce_max: {
3923 auto GetIntrinsicID = [this](QualType QT) {
3924 if (auto *VecTy = QT->getAs<VectorType>())
3925 QT = VecTy->getElementType();
3926 else if (QT->isSizelessVectorType())
3928
3929 if (QT->isSignedIntegerType())
3930 return llvm::Intrinsic::vector_reduce_smax;
3931 if (QT->isUnsignedIntegerType())
3932 return llvm::Intrinsic::vector_reduce_umax;
3933 assert(QT->isFloatingType() && "must have a float here");
3934 return llvm::Intrinsic::vector_reduce_fmax;
3935 };
3936 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3937 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3938 }
3939
3940 case Builtin::BI__builtin_reduce_min: {
3941 auto GetIntrinsicID = [this](QualType QT) {
3942 if (auto *VecTy = QT->getAs<VectorType>())
3943 QT = VecTy->getElementType();
3944 else if (QT->isSizelessVectorType())
3946
3947 if (QT->isSignedIntegerType())
3948 return llvm::Intrinsic::vector_reduce_smin;
3949 if (QT->isUnsignedIntegerType())
3950 return llvm::Intrinsic::vector_reduce_umin;
3951 assert(QT->isFloatingType() && "must have a float here");
3952 return llvm::Intrinsic::vector_reduce_fmin;
3953 };
3954
3955 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3956 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3957 }
3958
3959 case Builtin::BI__builtin_reduce_add:
3960 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3961 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3962 case Builtin::BI__builtin_reduce_mul:
3963 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3964 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3965 case Builtin::BI__builtin_reduce_xor:
3966 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3967 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3968 case Builtin::BI__builtin_reduce_or:
3969 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3970 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3971 case Builtin::BI__builtin_reduce_and:
3972 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3973 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3974
3975 case Builtin::BI__builtin_matrix_transpose: {
3976 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
3977 Value *MatValue = EmitScalarExpr(E->getArg(0));
3978 MatrixBuilder MB(Builder);
3979 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
3980 MatrixTy->getNumColumns());
3981 return RValue::get(Result);
3982 }
3983
3984 case Builtin::BI__builtin_matrix_column_major_load: {
3985 MatrixBuilder MB(Builder);
3986 // Emit everything that isn't dependent on the first parameter type
3987 Value *Stride = EmitScalarExpr(E->getArg(3));
3988 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3989 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
3990 assert(PtrTy && "arg0 must be of pointer type");
3991 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3992
3993 Address Src = EmitPointerWithAlignment(E->getArg(0));
3995 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
3996 0);
3997 Value *Result = MB.CreateColumnMajorLoad(
3998 Src.getElementType(), Src.emitRawPointer(*this),
3999 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4000 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4001 return RValue::get(Result);
4002 }
4003
4004 case Builtin::BI__builtin_matrix_column_major_store: {
4005 MatrixBuilder MB(Builder);
4006 Value *Matrix = EmitScalarExpr(E->getArg(0));
4007 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4008 Value *Stride = EmitScalarExpr(E->getArg(2));
4009
4010 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4011 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4012 assert(PtrTy && "arg1 must be of pointer type");
4013 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4014
4016 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4017 0);
4018 Value *Result = MB.CreateColumnMajorStore(
4019 Matrix, Dst.emitRawPointer(*this),
4020 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4021 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4022 return RValue::get(Result);
4023 }
4024
4025 case Builtin::BI__builtin_isinf_sign: {
4026 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4027 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4028 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4029 Value *Arg = EmitScalarExpr(E->getArg(0));
4030 Value *AbsArg = EmitFAbs(*this, Arg);
4031 Value *IsInf = Builder.CreateFCmpOEQ(
4032 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4033 Value *IsNeg = EmitSignBit(*this, Arg);
4034
4035 llvm::Type *IntTy = ConvertType(E->getType());
4036 Value *Zero = Constant::getNullValue(IntTy);
4037 Value *One = ConstantInt::get(IntTy, 1);
4038 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4039 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4040 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4041 return RValue::get(Result);
4042 }
4043
4044 case Builtin::BI__builtin_flt_rounds: {
4045 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4046
4047 llvm::Type *ResultType = ConvertType(E->getType());
4048 Value *Result = Builder.CreateCall(F);
4049 if (Result->getType() != ResultType)
4050 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4051 "cast");
4052 return RValue::get(Result);
4053 }
4054
4055 case Builtin::BI__builtin_set_flt_rounds: {
4056 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4057
4058 Value *V = EmitScalarExpr(E->getArg(0));
4059 Builder.CreateCall(F, V);
4060 return RValue::get(nullptr);
4061 }
4062
4063 case Builtin::BI__builtin_fpclassify: {
4064 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4065 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4066 Value *V = EmitScalarExpr(E->getArg(5));
4067 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4068
4069 // Create Result
4070 BasicBlock *Begin = Builder.GetInsertBlock();
4071 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4072 Builder.SetInsertPoint(End);
4073 PHINode *Result =
4074 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4075 "fpclassify_result");
4076
4077 // if (V==0) return FP_ZERO
4078 Builder.SetInsertPoint(Begin);
4079 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4080 "iszero");
4081 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4082 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4083 Builder.CreateCondBr(IsZero, End, NotZero);
4084 Result->addIncoming(ZeroLiteral, Begin);
4085
4086 // if (V != V) return FP_NAN
4087 Builder.SetInsertPoint(NotZero);
4088 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4089 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4090 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4091 Builder.CreateCondBr(IsNan, End, NotNan);
4092 Result->addIncoming(NanLiteral, NotZero);
4093
4094 // if (fabs(V) == infinity) return FP_INFINITY
4095 Builder.SetInsertPoint(NotNan);
4096 Value *VAbs = EmitFAbs(*this, V);
4097 Value *IsInf =
4098 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4099 "isinf");
4100 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4101 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4102 Builder.CreateCondBr(IsInf, End, NotInf);
4103 Result->addIncoming(InfLiteral, NotNan);
4104
4105 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4106 Builder.SetInsertPoint(NotInf);
4107 APFloat Smallest = APFloat::getSmallestNormalized(
4108 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4109 Value *IsNormal =
4110 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4111 "isnormal");
4112 Value *NormalResult =
4113 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4114 EmitScalarExpr(E->getArg(3)));
4115 Builder.CreateBr(End);
4116 Result->addIncoming(NormalResult, NotInf);
4117
4118 // return Result
4119 Builder.SetInsertPoint(End);
4120 return RValue::get(Result);
4121 }
4122
4123 // An alloca will always return a pointer to the alloca (stack) address
4124 // space. This address space need not be the same as the AST / Language
4125 // default (e.g. in C / C++ auto vars are in the generic address space). At
4126 // the AST level this is handled within CreateTempAlloca et al., but for the
4127 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4128 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4129 case Builtin::BIalloca:
4130 case Builtin::BI_alloca:
4131 case Builtin::BI__builtin_alloca_uninitialized:
4132 case Builtin::BI__builtin_alloca: {
4133 Value *Size = EmitScalarExpr(E->getArg(0));
4134 const TargetInfo &TI = getContext().getTargetInfo();
4135 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4136 const Align SuitableAlignmentInBytes =
4137 CGM.getContext()
4139 .getAsAlign();
4140 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4141 AI->setAlignment(SuitableAlignmentInBytes);
4142 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4143 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4146 if (AAS != EAS) {
4147 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4148 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4149 EAS, Ty));
4150 }
4151 return RValue::get(AI);
4152 }
4153
4154 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4155 case Builtin::BI__builtin_alloca_with_align: {
4156 Value *Size = EmitScalarExpr(E->getArg(0));
4157 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4158 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4159 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4160 const Align AlignmentInBytes =
4161 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4162 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4163 AI->setAlignment(AlignmentInBytes);
4164 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4165 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4168 if (AAS != EAS) {
4169 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4170 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4171 EAS, Ty));
4172 }
4173 return RValue::get(AI);
4174 }
4175
4176 case Builtin::BIbzero:
4177 case Builtin::BI__builtin_bzero: {
4178 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4179 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4180 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4181 E->getArg(0)->getExprLoc(), FD, 0);
4182 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4183 return RValue::get(nullptr);
4184 }
4185
4186 case Builtin::BIbcopy:
4187 case Builtin::BI__builtin_bcopy: {
4188 Address Src = EmitPointerWithAlignment(E->getArg(0));
4189 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4190 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4192 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4193 0);
4195 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4196 0);
4197 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4198 return RValue::get(nullptr);
4199 }
4200
4201 case Builtin::BImemcpy:
4202 case Builtin::BI__builtin_memcpy:
4203 case Builtin::BImempcpy:
4204 case Builtin::BI__builtin_mempcpy: {
4205 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4206 Address Src = EmitPointerWithAlignment(E->getArg(1));
4207 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4208 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4209 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4210 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4211 if (BuiltinID == Builtin::BImempcpy ||
4212 BuiltinID == Builtin::BI__builtin_mempcpy)
4214 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4215 else
4216 return RValue::get(Dest, *this);
4217 }
4218
4219 case Builtin::BI__builtin_memcpy_inline: {
4220 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4221 Address Src = EmitPointerWithAlignment(E->getArg(1));
4222 uint64_t Size =
4223 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4224 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4225 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4226 Builder.CreateMemCpyInline(Dest, Src, Size);
4227 return RValue::get(nullptr);
4228 }
4229
4230 case Builtin::BI__builtin_char_memchr:
4231 BuiltinID = Builtin::BI__builtin_memchr;
4232 break;
4233
4234 case Builtin::BI__builtin___memcpy_chk: {
4235 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4236 Expr::EvalResult SizeResult, DstSizeResult;
4237 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4238 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4239 break;
4240 llvm::APSInt Size = SizeResult.Val.getInt();
4241 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4242 if (Size.ugt(DstSize))
4243 break;
4244 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4245 Address Src = EmitPointerWithAlignment(E->getArg(1));
4246 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4247 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4248 return RValue::get(Dest, *this);
4249 }
4250
4251 case Builtin::BI__builtin_objc_memmove_collectable: {
4252 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4253 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4254 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4256 DestAddr, SrcAddr, SizeVal);
4257 return RValue::get(DestAddr, *this);
4258 }
4259
4260 case Builtin::BI__builtin___memmove_chk: {
4261 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4262 Expr::EvalResult SizeResult, DstSizeResult;
4263 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4264 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4265 break;
4266 llvm::APSInt Size = SizeResult.Val.getInt();
4267 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4268 if (Size.ugt(DstSize))
4269 break;
4270 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4271 Address Src = EmitPointerWithAlignment(E->getArg(1));
4272 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4273 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4274 return RValue::get(Dest, *this);
4275 }
4276
4277 case Builtin::BImemmove:
4278 case Builtin::BI__builtin_memmove: {
4279 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4280 Address Src = EmitPointerWithAlignment(E->getArg(1));
4281 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4282 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4283 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4284 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4285 return RValue::get(Dest, *this);
4286 }
4287 case Builtin::BImemset:
4288 case Builtin::BI__builtin_memset: {
4289 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4290 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4291 Builder.getInt8Ty());
4292 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4293 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4294 E->getArg(0)->getExprLoc(), FD, 0);
4295 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4296 return RValue::get(Dest, *this);
4297 }
4298 case Builtin::BI__builtin_memset_inline: {
4299 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4300 Value *ByteVal =
4301 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4302 uint64_t Size =
4303 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4305 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4306 0);
4307 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4308 return RValue::get(nullptr);
4309 }
4310 case Builtin::BI__builtin___memset_chk: {
4311 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4312 Expr::EvalResult SizeResult, DstSizeResult;
4313 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4314 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4315 break;
4316 llvm::APSInt Size = SizeResult.Val.getInt();
4317 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4318 if (Size.ugt(DstSize))
4319 break;
4320 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4321 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4322 Builder.getInt8Ty());
4323 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4324 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4325 return RValue::get(Dest, *this);
4326 }
4327 case Builtin::BI__builtin_wmemchr: {
4328 // The MSVC runtime library does not provide a definition of wmemchr, so we
4329 // need an inline implementation.
4330 if (!getTarget().getTriple().isOSMSVCRT())
4331 break;
4332
4333 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4334 Value *Str = EmitScalarExpr(E->getArg(0));
4335 Value *Chr = EmitScalarExpr(E->getArg(1));
4336 Value *Size = EmitScalarExpr(E->getArg(2));
4337
4338 BasicBlock *Entry = Builder.GetInsertBlock();
4339 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4340 BasicBlock *Next = createBasicBlock("wmemchr.next");
4341 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4342 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4343 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4344
4345 EmitBlock(CmpEq);
4346 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4347 StrPhi->addIncoming(Str, Entry);
4348 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4349 SizePhi->addIncoming(Size, Entry);
4350 CharUnits WCharAlign =
4352 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4353 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4354 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4355 Builder.CreateCondBr(StrEqChr, Exit, Next);
4356
4357 EmitBlock(Next);
4358 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4359 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4360 Value *NextSizeEq0 =
4361 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4362 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4363 StrPhi->addIncoming(NextStr, Next);
4364 SizePhi->addIncoming(NextSize, Next);
4365
4366 EmitBlock(Exit);
4367 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4368 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4369 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4370 Ret->addIncoming(FoundChr, CmpEq);
4371 return RValue::get(Ret);
4372 }
4373 case Builtin::BI__builtin_wmemcmp: {
4374 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4375 // need an inline implementation.
4376 if (!getTarget().getTriple().isOSMSVCRT())
4377 break;
4378
4379 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4380
4381 Value *Dst = EmitScalarExpr(E->getArg(0));
4382 Value *Src = EmitScalarExpr(E->getArg(1));
4383 Value *Size = EmitScalarExpr(E->getArg(2));
4384
4385 BasicBlock *Entry = Builder.GetInsertBlock();
4386 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4387 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4388 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4389 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4390 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4391 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4392
4393 EmitBlock(CmpGT);
4394 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4395 DstPhi->addIncoming(Dst, Entry);
4396 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4397 SrcPhi->addIncoming(Src, Entry);
4398 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4399 SizePhi->addIncoming(Size, Entry);
4400 CharUnits WCharAlign =
4402 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4403 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4404 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4405 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4406
4407 EmitBlock(CmpLT);
4408 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4409 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4410
4411 EmitBlock(Next);
4412 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4413 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4414 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4415 Value *NextSizeEq0 =
4416 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4417 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4418 DstPhi->addIncoming(NextDst, Next);
4419 SrcPhi->addIncoming(NextSrc, Next);
4420 SizePhi->addIncoming(NextSize, Next);
4421
4422 EmitBlock(Exit);
4423 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4424 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4425 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4426 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4427 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4428 return RValue::get(Ret);
4429 }
4430 case Builtin::BI__builtin_dwarf_cfa: {
4431 // The offset in bytes from the first argument to the CFA.
4432 //
4433 // Why on earth is this in the frontend? Is there any reason at
4434 // all that the backend can't reasonably determine this while
4435 // lowering llvm.eh.dwarf.cfa()?
4436 //
4437 // TODO: If there's a satisfactory reason, add a target hook for
4438 // this instead of hard-coding 0, which is correct for most targets.
4439 int32_t Offset = 0;
4440
4441 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4442 return RValue::get(Builder.CreateCall(F,
4443 llvm::ConstantInt::get(Int32Ty, Offset)));
4444 }
4445 case Builtin::BI__builtin_return_address: {
4446 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4447 getContext().UnsignedIntTy);
4448 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4449 return RValue::get(Builder.CreateCall(F, Depth));
4450 }
4451 case Builtin::BI_ReturnAddress: {
4452 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4453 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4454 }
4455 case Builtin::BI__builtin_frame_address: {
4456 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4457 getContext().UnsignedIntTy);
4458 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4459 return RValue::get(Builder.CreateCall(F, Depth));
4460 }
4461 case Builtin::BI__builtin_extract_return_addr: {
4462 Value *Address = EmitScalarExpr(E->getArg(0));
4464 return RValue::get(Result);
4465 }
4466 case Builtin::BI__builtin_frob_return_addr: {
4467 Value *Address = EmitScalarExpr(E->getArg(0));
4469 return RValue::get(Result);
4470 }
4471 case Builtin::BI__builtin_dwarf_sp_column: {
4472 llvm::IntegerType *Ty
4473 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4475 if (Column == -1) {
4476 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4477 return RValue::get(llvm::UndefValue::get(Ty));
4478 }
4479 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4480 }
4481 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4482 Value *Address = EmitScalarExpr(E->getArg(0));
4483 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4484 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4485 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4486 }
4487 case Builtin::BI__builtin_eh_return: {
4488 Value *Int = EmitScalarExpr(E->getArg(0));
4489 Value *Ptr = EmitScalarExpr(E->getArg(1));
4490
4491 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4492 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4493 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4494 Function *F =
4495 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4496 : Intrinsic::eh_return_i64);
4497 Builder.CreateCall(F, {Int, Ptr});
4498 Builder.CreateUnreachable();
4499
4500 // We do need to preserve an insertion point.
4501 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4502
4503 return RValue::get(nullptr);
4504 }
4505 case Builtin::BI__builtin_unwind_init: {
4506 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4507 Builder.CreateCall(F);
4508 return RValue::get(nullptr);
4509 }
4510 case Builtin::BI__builtin_extend_pointer: {
4511 // Extends a pointer to the size of an _Unwind_Word, which is
4512 // uint64_t on all platforms. Generally this gets poked into a
4513 // register and eventually used as an address, so if the
4514 // addressing registers are wider than pointers and the platform
4515 // doesn't implicitly ignore high-order bits when doing
4516 // addressing, we need to make sure we zext / sext based on
4517 // the platform's expectations.
4518 //
4519 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4520
4521 // Cast the pointer to intptr_t.
4522 Value *Ptr = EmitScalarExpr(E->getArg(0));
4523 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4524
4525 // If that's 64 bits, we're done.
4526 if (IntPtrTy->getBitWidth() == 64)
4527 return RValue::get(Result);
4528
4529 // Otherwise, ask the codegen data what to do.
4530 if (getTargetHooks().extendPointerWithSExt())
4531 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4532 else
4533 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4534 }
4535 case Builtin::BI__builtin_setjmp: {
4536 // Buffer is a void**.
4537 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4538
4539 // Store the frame pointer to the setjmp buffer.
4540 Value *FrameAddr = Builder.CreateCall(
4541 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4542 ConstantInt::get(Int32Ty, 0));
4543 Builder.CreateStore(FrameAddr, Buf);
4544
4545 // Store the stack pointer to the setjmp buffer.
4546 Value *StackAddr = Builder.CreateStackSave();
4547 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4548
4549 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4550 Builder.CreateStore(StackAddr, StackSaveSlot);
4551
4552 // Call LLVM's EH setjmp, which is lightweight.
4553 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4554 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4555 }
4556 case Builtin::BI__builtin_longjmp: {
4557 Value *Buf = EmitScalarExpr(E->getArg(0));
4558
4559 // Call LLVM's EH longjmp, which is lightweight.
4560 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4561
4562 // longjmp doesn't return; mark this as unreachable.
4563 Builder.CreateUnreachable();
4564
4565 // We do need to preserve an insertion point.
4566 EmitBlock(createBasicBlock("longjmp.cont"));
4567
4568 return RValue::get(nullptr);
4569 }
4570 case Builtin::BI__builtin_launder: {
4571 const Expr *Arg = E->getArg(0);
4572 QualType ArgTy = Arg->getType()->getPointeeType();
4573 Value *Ptr = EmitScalarExpr(Arg);
4574 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4576
4577 return RValue::get(Ptr);
4578 }
4579 case Builtin::BI__sync_fetch_and_add:
4580 case Builtin::BI__sync_fetch_and_sub:
4581 case Builtin::BI__sync_fetch_and_or:
4582 case Builtin::BI__sync_fetch_and_and:
4583 case Builtin::BI__sync_fetch_and_xor:
4584 case Builtin::BI__sync_fetch_and_nand:
4585 case Builtin::BI__sync_add_and_fetch:
4586 case Builtin::BI__sync_sub_and_fetch:
4587 case Builtin::BI__sync_and_and_fetch:
4588 case Builtin::BI__sync_or_and_fetch:
4589 case Builtin::BI__sync_xor_and_fetch:
4590 case Builtin::BI__sync_nand_and_fetch:
4591 case Builtin::BI__sync_val_compare_and_swap:
4592 case Builtin::BI__sync_bool_compare_and_swap:
4593 case Builtin::BI__sync_lock_test_and_set:
4594 case Builtin::BI__sync_lock_release:
4595 case Builtin::BI__sync_swap:
4596 llvm_unreachable("Shouldn't make it through sema");
4597 case Builtin::BI__sync_fetch_and_add_1:
4598 case Builtin::BI__sync_fetch_and_add_2:
4599 case Builtin::BI__sync_fetch_and_add_4:
4600 case Builtin::BI__sync_fetch_and_add_8:
4601 case Builtin::BI__sync_fetch_and_add_16:
4602 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4603 case Builtin::BI__sync_fetch_and_sub_1:
4604 case Builtin::BI__sync_fetch_and_sub_2:
4605 case Builtin::BI__sync_fetch_and_sub_4:
4606 case Builtin::BI__sync_fetch_and_sub_8:
4607 case Builtin::BI__sync_fetch_and_sub_16:
4608 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4609 case Builtin::BI__sync_fetch_and_or_1:
4610 case Builtin::BI__sync_fetch_and_or_2:
4611 case Builtin::BI__sync_fetch_and_or_4:
4612 case Builtin::BI__sync_fetch_and_or_8:
4613 case Builtin::BI__sync_fetch_and_or_16:
4614 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4615 case Builtin::BI__sync_fetch_and_and_1:
4616 case Builtin::BI__sync_fetch_and_and_2:
4617 case Builtin::BI__sync_fetch_and_and_4:
4618 case Builtin::BI__sync_fetch_and_and_8:
4619 case Builtin::BI__sync_fetch_and_and_16:
4620 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4621 case Builtin::BI__sync_fetch_and_xor_1:
4622 case Builtin::BI__sync_fetch_and_xor_2:
4623 case Builtin::BI__sync_fetch_and_xor_4:
4624 case Builtin::BI__sync_fetch_and_xor_8:
4625 case Builtin::BI__sync_fetch_and_xor_16:
4626 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4627 case Builtin::BI__sync_fetch_and_nand_1:
4628 case Builtin::BI__sync_fetch_and_nand_2:
4629 case Builtin::BI__sync_fetch_and_nand_4:
4630 case Builtin::BI__sync_fetch_and_nand_8:
4631 case Builtin::BI__sync_fetch_and_nand_16:
4632 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4633
4634 // Clang extensions: not overloaded yet.
4635 case Builtin::BI__sync_fetch_and_min:
4636 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4637 case Builtin::BI__sync_fetch_and_max:
4638 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4639 case Builtin::BI__sync_fetch_and_umin:
4640 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4641 case Builtin::BI__sync_fetch_and_umax:
4642 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4643
4644 case Builtin::BI__sync_add_and_fetch_1:
4645 case Builtin::BI__sync_add_and_fetch_2:
4646 case Builtin::BI__sync_add_and_fetch_4:
4647 case Builtin::BI__sync_add_and_fetch_8:
4648 case Builtin::BI__sync_add_and_fetch_16:
4649 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4650 llvm::Instruction::Add);
4651 case Builtin::BI__sync_sub_and_fetch_1:
4652 case Builtin::BI__sync_sub_and_fetch_2:
4653 case Builtin::BI__sync_sub_and_fetch_4:
4654 case Builtin::BI__sync_sub_and_fetch_8:
4655 case Builtin::BI__sync_sub_and_fetch_16:
4656 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4657 llvm::Instruction::Sub);
4658 case Builtin::BI__sync_and_and_fetch_1:
4659 case Builtin::BI__sync_and_and_fetch_2:
4660 case Builtin::BI__sync_and_and_fetch_4:
4661 case Builtin::BI__sync_and_and_fetch_8:
4662 case Builtin::BI__sync_and_and_fetch_16:
4663 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4664 llvm::Instruction::And);
4665 case Builtin::BI__sync_or_and_fetch_1:
4666 case Builtin::BI__sync_or_and_fetch_2:
4667 case Builtin::BI__sync_or_and_fetch_4:
4668 case Builtin::BI__sync_or_and_fetch_8:
4669 case Builtin::BI__sync_or_and_fetch_16:
4670 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
4671 llvm::Instruction::Or);
4672 case Builtin::BI__sync_xor_and_fetch_1:
4673 case Builtin::BI__sync_xor_and_fetch_2:
4674 case Builtin::BI__sync_xor_and_fetch_4:
4675 case Builtin::BI__sync_xor_and_fetch_8:
4676 case Builtin::BI__sync_xor_and_fetch_16:
4677 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
4678 llvm::Instruction::Xor);
4679 case Builtin::BI__sync_nand_and_fetch_1:
4680 case Builtin::BI__sync_nand_and_fetch_2:
4681 case Builtin::BI__sync_nand_and_fetch_4:
4682 case Builtin::BI__sync_nand_and_fetch_8:
4683 case Builtin::BI__sync_nand_and_fetch_16:
4684 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
4685 llvm::Instruction::And, true);
4686
4687 case Builtin::BI__sync_val_compare_and_swap_1:
4688 case Builtin::BI__sync_val_compare_and_swap_2:
4689 case Builtin::BI__sync_val_compare_and_swap_4:
4690 case Builtin::BI__sync_val_compare_and_swap_8:
4691 case Builtin::BI__sync_val_compare_and_swap_16:
4692 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
4693
4694 case Builtin::BI__sync_bool_compare_and_swap_1:
4695 case Builtin::BI__sync_bool_compare_and_swap_2:
4696 case Builtin::BI__sync_bool_compare_and_swap_4:
4697 case Builtin::BI__sync_bool_compare_and_swap_8:
4698 case Builtin::BI__sync_bool_compare_and_swap_16:
4699 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
4700
4701 case Builtin::BI__sync_swap_1:
4702 case Builtin::BI__sync_swap_2:
4703 case Builtin::BI__sync_swap_4:
4704 case Builtin::BI__sync_swap_8:
4705 case Builtin::BI__sync_swap_16:
4706 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4707
4708 case Builtin::BI__sync_lock_test_and_set_1:
4709 case Builtin::BI__sync_lock_test_and_set_2:
4710 case Builtin::BI__sync_lock_test_and_set_4:
4711 case Builtin::BI__sync_lock_test_and_set_8:
4712 case Builtin::BI__sync_lock_test_and_set_16:
4713 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4714
4715 case Builtin::BI__sync_lock_release_1:
4716 case Builtin::BI__sync_lock_release_2:
4717 case Builtin::BI__sync_lock_release_4:
4718 case Builtin::BI__sync_lock_release_8:
4719 case Builtin::BI__sync_lock_release_16: {
4720 Address Ptr = CheckAtomicAlignment(*this, E);
4721 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4722
4723 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4724 getContext().getTypeSize(ElTy));
4725 llvm::StoreInst *Store =
4726 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
4727 Store->setAtomic(llvm::AtomicOrdering::Release);
4728 return RValue::get(nullptr);
4729 }
4730
4731 case Builtin::BI__sync_synchronize: {
4732 // We assume this is supposed to correspond to a C++0x-style
4733 // sequentially-consistent fence (i.e. this is only usable for
4734 // synchronization, not device I/O or anything like that). This intrinsic
4735 // is really badly designed in the sense that in theory, there isn't
4736 // any way to safely use it... but in practice, it mostly works
4737 // to use it with non-atomic loads and stores to get acquire/release
4738 // semantics.
4739 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
4740 return RValue::get(nullptr);
4741 }
4742
4743 case Builtin::BI__builtin_nontemporal_load:
4744 return RValue::get(EmitNontemporalLoad(*this, E));
4745 case Builtin::BI__builtin_nontemporal_store:
4746 return RValue::get(EmitNontemporalStore(*this, E));
4747 case Builtin::BI__c11_atomic_is_lock_free:
4748 case Builtin::BI__atomic_is_lock_free: {
4749 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4750 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4751 // _Atomic(T) is always properly-aligned.
4752 const char *LibCallName = "__atomic_is_lock_free";
4753 CallArgList Args;
4754 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
4755 getContext().getSizeType());
4756 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4757 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
4759 else
4760 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
4762 const CGFunctionInfo &FuncInfo =
4764 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
4765 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
4766 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
4767 ReturnValueSlot(), Args);
4768 }
4769
4770 case Builtin::BI__atomic_test_and_set: {
4771 // Look at the argument type to determine whether this is a volatile
4772 // operation. The parameter type is always volatile.
4773 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4774 bool Volatile =
4776
4777 Address Ptr =
4779
4780 Value *NewVal = Builder.getInt8(1);
4781 Value *Order = EmitScalarExpr(E->getArg(1));
4782 if (isa<llvm::ConstantInt>(Order)) {
4783 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4784 AtomicRMWInst *Result = nullptr;
4785 switch (ord) {
4786 case 0: // memory_order_relaxed
4787 default: // invalid order
4788 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4789 llvm::AtomicOrdering::Monotonic);
4790 break;
4791 case 1: // memory_order_consume
4792 case 2: // memory_order_acquire
4793 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4794 llvm::AtomicOrdering::Acquire);
4795 break;
4796 case 3: // memory_order_release
4797 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4798 llvm::AtomicOrdering::Release);
4799 break;
4800 case 4: // memory_order_acq_rel
4801
4802 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4803 llvm::AtomicOrdering::AcquireRelease);
4804 break;
4805 case 5: // memory_order_seq_cst
4807 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4808 llvm::AtomicOrdering::SequentiallyConsistent);
4809 break;
4810 }
4811 Result->setVolatile(Volatile);
4812 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4813 }
4814
4815 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4816
4817 llvm::BasicBlock *BBs[5] = {
4818 createBasicBlock("monotonic", CurFn),
4819 createBasicBlock("acquire", CurFn),
4820 createBasicBlock("release", CurFn),
4821 createBasicBlock("acqrel", CurFn),
4822 createBasicBlock("seqcst", CurFn)
4823 };
4824 llvm::AtomicOrdering Orders[5] = {
4825 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4826 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4827 llvm::AtomicOrdering::SequentiallyConsistent};
4828
4829 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4830 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4831
4832 Builder.SetInsertPoint(ContBB);
4833 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4834
4835 for (unsigned i = 0; i < 5; ++i) {
4836 Builder.SetInsertPoint(BBs[i]);
4837 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4838 Ptr, NewVal, Orders[i]);
4839 RMW->setVolatile(Volatile);
4840 Result->addIncoming(RMW, BBs[i]);
4841 Builder.CreateBr(ContBB);
4842 }
4843
4844 SI->addCase(Builder.getInt32(0), BBs[0]);
4845 SI->addCase(Builder.getInt32(1), BBs[1]);
4846 SI->addCase(Builder.getInt32(2), BBs[1]);
4847 SI->addCase(Builder.getInt32(3), BBs[2]);
4848 SI->addCase(Builder.getInt32(4), BBs[3]);
4849 SI->addCase(Builder.getInt32(5), BBs[4]);
4850
4851 Builder.SetInsertPoint(ContBB);
4852 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4853 }
4854
4855 case Builtin::BI__atomic_clear: {
4856 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4857 bool Volatile =
4859
4860 Address Ptr = EmitPointerWithAlignment(E->getArg(0));
4861 Ptr = Ptr.withElementType(Int8Ty);
4862 Value *NewVal = Builder.getInt8(0);
4863 Value *Order = EmitScalarExpr(E->getArg(1));
4864 if (isa<llvm::ConstantInt>(Order)) {
4865 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4866 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4867 switch (ord) {
4868 case 0: // memory_order_relaxed
4869 default: // invalid order
4870 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4871 break;
4872 case 3: // memory_order_release
4873 Store->setOrdering(llvm::AtomicOrdering::Release);
4874 break;
4875 case 5: // memory_order_seq_cst
4876 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4877 break;
4878 }
4879 return RValue::get(nullptr);
4880 }
4881
4882 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4883
4884 llvm::BasicBlock *BBs[3] = {
4885 createBasicBlock("monotonic", CurFn),
4886 createBasicBlock("release", CurFn),
4887 createBasicBlock("seqcst", CurFn)
4888 };
4889 llvm::AtomicOrdering Orders[3] = {
4890 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4891 llvm::AtomicOrdering::SequentiallyConsistent};
4892
4893 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4894 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4895
4896 for (unsigned i = 0; i < 3; ++i) {
4897 Builder.SetInsertPoint(BBs[i]);
4898 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4899 Store->setOrdering(Orders[i]);
4900 Builder.CreateBr(ContBB);
4901 }
4902
4903 SI->addCase(Builder.getInt32(0), BBs[0]);
4904 SI->addCase(Builder.getInt32(3), BBs[1]);
4905 SI->addCase(Builder.getInt32(5), BBs[2]);
4906
4907 Builder.SetInsertPoint(ContBB);
4908 return RValue::get(nullptr);
4909 }
4910
4911 case Builtin::BI__atomic_thread_fence:
4912 case Builtin::BI__atomic_signal_fence:
4913 case Builtin::BI__c11_atomic_thread_fence:
4914 case Builtin::BI__c11_atomic_signal_fence: {
4915 llvm::SyncScope::ID SSID;
4916 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4917 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4918 SSID = llvm::SyncScope::SingleThread;
4919 else
4920 SSID = llvm::SyncScope::System;
4921 Value *Order = EmitScalarExpr(E->getArg(0));
4922 if (isa<llvm::ConstantInt>(Order)) {
4923 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4924 switch (ord) {
4925 case 0: // memory_order_relaxed
4926 default: // invalid order
4927 break;
4928 case 1: // memory_order_consume
4929 case 2: // memory_order_acquire
4930 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4931 break;
4932 case 3: // memory_order_release
4933 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4934 break;
4935 case 4: // memory_order_acq_rel
4936 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4937 break;
4938 case 5: // memory_order_seq_cst
4939 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4940 break;
4941 }
4942 return RValue::get(nullptr);
4943 }
4944
4945 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4946 AcquireBB = createBasicBlock("acquire", CurFn);
4947 ReleaseBB = createBasicBlock("release", CurFn);
4948 AcqRelBB = createBasicBlock("acqrel", CurFn);
4949 SeqCstBB = createBasicBlock("seqcst", CurFn);
4950 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4951
4952 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4953 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4954
4955 Builder.SetInsertPoint(AcquireBB);
4956 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4957 Builder.CreateBr(ContBB);
4958 SI->addCase(Builder.getInt32(1), AcquireBB);
4959 SI->addCase(Builder.getInt32(2), AcquireBB);
4960
4961 Builder.SetInsertPoint(ReleaseBB);
4962 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4963 Builder.CreateBr(ContBB);
4964 SI->addCase(Builder.getInt32(3), ReleaseBB);
4965
4966 Builder.SetInsertPoint(AcqRelBB);
4967 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4968 Builder.CreateBr(ContBB);
4969 SI->addCase(Builder.getInt32(4), AcqRelBB);
4970
4971 Builder.SetInsertPoint(SeqCstBB);
4972 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4973 Builder.CreateBr(ContBB);
4974 SI->addCase(Builder.getInt32(5), SeqCstBB);
4975
4976 Builder.SetInsertPoint(ContBB);
4977 return RValue::get(nullptr);
4978 }
4979
4980 case Builtin::BI__builtin_signbit:
4981 case Builtin::BI__builtin_signbitf:
4982 case Builtin::BI__builtin_signbitl: {
4983 return RValue::get(
4984 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
4985 ConvertType(E->getType())));
4986 }
4987 case Builtin::BI__warn_memset_zero_len:
4988 return RValue::getIgnored();
4989 case Builtin::BI__annotation: {
4990 // Re-encode each wide string to UTF8 and make an MDString.
4992 for (const Expr *Arg : E->arguments()) {
4993 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
4994 assert(Str->getCharByteWidth() == 2);
4995 StringRef WideBytes = Str->getBytes();
4996 std::string StrUtf8;
4997 if (!convertUTF16ToUTF8String(
4998 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
4999 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5000 continue;
5001 }
5002 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5003 }
5004
5005 // Build and MDTuple of MDStrings and emit the intrinsic call.
5006 llvm::Function *F =
5007 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5008 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5009 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5010 return RValue::getIgnored();
5011 }
5012 case Builtin::BI__builtin_annotation: {
5013 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5014 llvm::Function *F =
5015 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5016 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5017
5018 // Get the annotation string, go through casts. Sema requires this to be a
5019 // non-wide string literal, potentially casted, so the cast<> is safe.
5020 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5021 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5022 return RValue::get(
5023 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5024 }
5025 case Builtin::BI__builtin_addcb:
5026 case Builtin::BI__builtin_addcs:
5027 case Builtin::BI__builtin_addc:
5028 case Builtin::BI__builtin_addcl:
5029 case Builtin::BI__builtin_addcll:
5030 case Builtin::BI__builtin_subcb:
5031 case Builtin::BI__builtin_subcs:
5032 case Builtin::BI__builtin_subc:
5033 case Builtin::BI__builtin_subcl:
5034 case Builtin::BI__builtin_subcll: {
5035
5036 // We translate all of these builtins from expressions of the form:
5037 // int x = ..., y = ..., carryin = ..., carryout, result;
5038 // result = __builtin_addc(x, y, carryin, &carryout);
5039 //
5040 // to LLVM IR of the form:
5041 //
5042 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5043 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5044 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5045 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5046 // i32 %carryin)
5047 // %result = extractvalue {i32, i1} %tmp2, 0
5048 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5049 // %tmp3 = or i1 %carry1, %carry2
5050 // %tmp4 = zext i1 %tmp3 to i32
5051 // store i32 %tmp4, i32* %carryout
5052
5053 // Scalarize our inputs.
5054 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5055 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5056 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5057 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5058
5059 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5060 llvm::Intrinsic::ID IntrinsicId;
5061 switch (BuiltinID) {
5062 default: llvm_unreachable("Unknown multiprecision builtin id.");
5063 case Builtin::BI__builtin_addcb:
5064 case Builtin::BI__builtin_addcs:
5065 case Builtin::BI__builtin_addc:
5066 case Builtin::BI__builtin_addcl:
5067 case Builtin::BI__builtin_addcll:
5068 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5069 break;
5070 case Builtin::BI__builtin_subcb:
5071 case Builtin::BI__builtin_subcs:
5072 case Builtin::BI__builtin_subc:
5073 case Builtin::BI__builtin_subcl:
5074 case Builtin::BI__builtin_subcll:
5075 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5076 break;
5077 }
5078
5079 // Construct our resulting LLVM IR expression.
5080 llvm::Value *Carry1;
5081 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5082 X, Y, Carry1);
5083 llvm::Value *Carry2;
5084 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5085 Sum1, Carryin, Carry2);
5086 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5087 X->getType());
5088 Builder.CreateStore(CarryOut, CarryOutPtr);
5089 return RValue::get(Sum2);
5090 }
5091
5092 case Builtin::BI__builtin_add_overflow:
5093 case Builtin::BI__builtin_sub_overflow:
5094 case Builtin::BI__builtin_mul_overflow: {
5095 const clang::Expr *LeftArg = E->getArg(0);
5096 const clang::Expr *RightArg = E->getArg(1);
5097 const clang::Expr *ResultArg = E->getArg(2);
5098
5099 clang::QualType ResultQTy =
5100 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5101
5102 WidthAndSignedness LeftInfo =
5104 WidthAndSignedness RightInfo =
5106 WidthAndSignedness ResultInfo =
5108
5109 // Handle mixed-sign multiplication as a special case, because adding
5110 // runtime or backend support for our generic irgen would be too expensive.
5111 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5112 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5113 RightInfo, ResultArg, ResultQTy,
5114 ResultInfo);
5115
5116 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5117 ResultInfo))
5119 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5120 ResultInfo);
5121
5122 WidthAndSignedness EncompassingInfo =
5123 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5124
5125 llvm::Type *EncompassingLLVMTy =
5126 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5127
5128 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5129
5130 llvm::Intrinsic::ID IntrinsicId;
5131 switch (BuiltinID) {
5132 default:
5133 llvm_unreachable("Unknown overflow builtin id.");
5134 case Builtin::BI__builtin_add_overflow:
5135 IntrinsicId = EncompassingInfo.Signed
5136 ? llvm::Intrinsic::sadd_with_overflow
5137 : llvm::Intrinsic::uadd_with_overflow;
5138 break;
5139 case Builtin::BI__builtin_sub_overflow:
5140 IntrinsicId = EncompassingInfo.Signed
5141 ? llvm::Intrinsic::ssub_with_overflow
5142 : llvm::Intrinsic::usub_with_overflow;
5143 break;
5144 case Builtin::BI__builtin_mul_overflow:
5145 IntrinsicId = EncompassingInfo.Signed
5146 ? llvm::Intrinsic::smul_with_overflow
5147 : llvm::Intrinsic::umul_with_overflow;
5148 break;
5149 }
5150
5151 llvm::Value *Left = EmitScalarExpr(LeftArg);
5152 llvm::Value *Right = EmitScalarExpr(RightArg);
5153 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5154
5155 // Extend each operand to the encompassing type.
5156 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5157 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5158
5159 // Perform the operation on the extended values.
5160 llvm::Value *Overflow, *Result;
5161 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5162
5163 if (EncompassingInfo.Width > ResultInfo.Width) {
5164 // The encompassing type is wider than the result type, so we need to
5165 // truncate it.
5166 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5167
5168 // To see if the truncation caused an overflow, we will extend
5169 // the result and then compare it to the original result.
5170 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5171 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5172 llvm::Value *TruncationOverflow =
5173 Builder.CreateICmpNE(Result, ResultTruncExt);
5174
5175 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5176 Result = ResultTrunc;
5177 }
5178
5179 // Finally, store the result using the pointer.
5180 bool isVolatile =
5181 ResultArg->getType()->getPointeeType().isVolatileQualified();
5182 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5183
5184 return RValue::get(Overflow);
5185 }
5186
5187 case Builtin::BI__builtin_uadd_overflow:
5188 case Builtin::BI__builtin_uaddl_overflow:
5189 case Builtin::BI__builtin_uaddll_overflow:
5190 case Builtin::BI__builtin_usub_overflow:
5191 case Builtin::BI__builtin_usubl_overflow:
5192 case Builtin::BI__builtin_usubll_overflow:
5193 case Builtin::BI__builtin_umul_overflow:
5194 case Builtin::BI__builtin_umull_overflow:
5195 case Builtin::BI__builtin_umulll_overflow:
5196 case Builtin::BI__builtin_sadd_overflow:
5197 case Builtin::BI__builtin_saddl_overflow:
5198 case Builtin::BI__builtin_saddll_overflow:
5199 case Builtin::BI__builtin_ssub_overflow:
5200 case Builtin::BI__builtin_ssubl_overflow:
5201 case Builtin::BI__builtin_ssubll_overflow:
5202 case Builtin::BI__builtin_smul_overflow:
5203 case Builtin::BI__builtin_smull_overflow:
5204 case Builtin::BI__builtin_smulll_overflow: {
5205
5206 // We translate all of these builtins directly to the relevant llvm IR node.
5207
5208 // Scalarize our inputs.
5209 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5210 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5211 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5212
5213 // Decide which of the overflow intrinsics we are lowering to:
5214 llvm::Intrinsic::ID IntrinsicId;
5215 switch (BuiltinID) {
5216 default: llvm_unreachable("Unknown overflow builtin id.");
5217 case Builtin::BI__builtin_uadd_overflow:
5218 case Builtin::BI__builtin_uaddl_overflow:
5219 case Builtin::BI__builtin_uaddll_overflow:
5220 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5221 break;
5222 case Builtin::BI__builtin_usub_overflow:
5223 case Builtin::BI__builtin_usubl_overflow:
5224 case Builtin::BI__builtin_usubll_overflow:
5225 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5226 break;
5227 case Builtin::BI__builtin_umul_overflow:
5228 case Builtin::BI__builtin_umull_overflow:
5229 case Builtin::BI__builtin_umulll_overflow:
5230 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5231 break;
5232 case Builtin::BI__builtin_sadd_overflow:
5233 case Builtin::BI__builtin_saddl_overflow:
5234 case Builtin::BI__builtin_saddll_overflow:
5235 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5236 break;
5237 case Builtin::BI__builtin_ssub_overflow:
5238 case Builtin::BI__builtin_ssubl_overflow:
5239 case Builtin::BI__builtin_ssubll_overflow:
5240 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5241 break;
5242 case Builtin::BI__builtin_smul_overflow:
5243 case Builtin::BI__builtin_smull_overflow:
5244 case Builtin::BI__builtin_smulll_overflow:
5245 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5246 break;
5247 }
5248
5249
5250 llvm::Value *Carry;
5251 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5252 Builder.CreateStore(Sum, SumOutPtr);
5253
5254 return RValue::get(Carry);
5255 }
5256 case Builtin::BIaddressof:
5257 case Builtin::BI__addressof:
5258 case Builtin::BI__builtin_addressof:
5259 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5260 case Builtin::BI__builtin_function_start:
5263 case Builtin::BI__builtin_operator_new:
5265 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5266 case Builtin::BI__builtin_operator_delete:
5268 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5269 return RValue::get(nullptr);
5270
5271 case Builtin::BI__builtin_is_aligned:
5272 return EmitBuiltinIsAligned(E);
5273 case Builtin::BI__builtin_align_up:
5274 return EmitBuiltinAlignTo(E, true);
5275 case Builtin::BI__builtin_align_down:
5276 return EmitBuiltinAlignTo(E, false);
5277
5278 case Builtin::BI__noop:
5279 // __noop always evaluates to an integer literal zero.
5280 return RValue::get(ConstantInt::get(IntTy, 0));
5281 case Builtin::BI__builtin_call_with_static_chain: {
5282 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5283 const Expr *Chain = E->getArg(1);
5284 return EmitCall(Call->getCallee()->getType(),
5285 EmitCallee(Call->getCallee()), Call, ReturnValue,
5286 EmitScalarExpr(Chain));
5287 }
5288 case Builtin::BI_InterlockedExchange8:
5289 case Builtin::BI_InterlockedExchange16:
5290 case Builtin::BI_InterlockedExchange:
5291 case Builtin::BI_InterlockedExchangePointer:
5292 return RValue::get(
5293 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5294 case Builtin::BI_InterlockedCompareExchangePointer:
5295 case Builtin::BI_InterlockedCompareExchangePointer_nf: {
5296 llvm::Type *RTy;
5297 llvm::IntegerType *IntType = IntegerType::get(
5299
5300 Address DestAddr = CheckAtomicAlignment(*this, E);
5301
5302 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
5303 RTy = Exchange->getType();
5304 Exchange = Builder.CreatePtrToInt(Exchange, IntType);
5305
5306 llvm::Value *Comparand =
5307 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
5308
5309 auto Ordering =
5310 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
5311 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
5312
5313 auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
5314 Ordering, Ordering);
5315 Result->setVolatile(true);
5316
5317 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
5318 0),
5319 RTy));
5320 }
5321 case Builtin::BI_InterlockedCompareExchange8:
5322 case Builtin::BI_InterlockedCompareExchange16:
5323 case Builtin::BI_InterlockedCompareExchange:
5324 case Builtin::BI_InterlockedCompareExchange64:
5326 case Builtin::BI_InterlockedIncrement16:
5327 case Builtin::BI_InterlockedIncrement:
5328 return RValue::get(
5329 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5330 case Builtin::BI_InterlockedDecrement16:
5331 case Builtin::BI_InterlockedDecrement:
5332 return RValue::get(
5333 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5334 case Builtin::BI_InterlockedAnd8:
5335 case Builtin::BI_InterlockedAnd16:
5336 case Builtin::BI_InterlockedAnd:
5337 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5338 case Builtin::BI_InterlockedExchangeAdd8:
5339 case Builtin::BI_InterlockedExchangeAdd16:
5340 case Builtin::BI_InterlockedExchangeAdd:
5341 return RValue::get(
5342 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5343 case Builtin::BI_InterlockedExchangeSub8:
5344 case Builtin::BI_InterlockedExchangeSub16:
5345 case Builtin::BI_InterlockedExchangeSub:
5346 return RValue::get(
5347 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5348 case Builtin::BI_InterlockedOr8:
5349 case Builtin::BI_InterlockedOr16:
5350 case Builtin::BI_InterlockedOr:
5351 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5352 case Builtin::BI_InterlockedXor8:
5353 case Builtin::BI_InterlockedXor16:
5354 case Builtin::BI_InterlockedXor:
5355 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5356
5357 case Builtin::BI_bittest64:
5358 case Builtin::BI_bittest:
5359 case Builtin::BI_bittestandcomplement64:
5360 case Builtin::BI_bittestandcomplement:
5361 case Builtin::BI_bittestandreset64:
5362 case Builtin::BI_bittestandreset:
5363 case Builtin::BI_bittestandset64:
5364 case Builtin::BI_bittestandset:
5365 case Builtin::BI_interlockedbittestandreset:
5366 case Builtin::BI_interlockedbittestandreset64:
5367 case Builtin::BI_interlockedbittestandset64:
5368 case Builtin::BI_interlockedbittestandset:
5369 case Builtin::BI_interlockedbittestandset_acq:
5370 case Builtin::BI_interlockedbittestandset_rel:
5371 case Builtin::BI_interlockedbittestandset_nf:
5372 case Builtin::BI_interlockedbittestandreset_acq:
5373 case Builtin::BI_interlockedbittestandreset_rel:
5374 case Builtin::BI_interlockedbittestandreset_nf:
5375 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5376
5377 // These builtins exist to emit regular volatile loads and stores not
5378 // affected by the -fms-volatile setting.
5379 case Builtin::BI__iso_volatile_load8:
5380 case Builtin::BI__iso_volatile_load16:
5381 case Builtin::BI__iso_volatile_load32:
5382 case Builtin::BI__iso_volatile_load64:
5383 return RValue::get(EmitISOVolatileLoad(*this, E));
5384 case Builtin::BI__iso_volatile_store8:
5385 case Builtin::BI__iso_volatile_store16:
5386 case Builtin::BI__iso_volatile_store32:
5387 case Builtin::BI__iso_volatile_store64:
5388 return RValue::get(EmitISOVolatileStore(*this, E));
5389
5390 case Builtin::BI__builtin_ptrauth_sign_constant:
5391 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5392
5393 case Builtin::BI__builtin_ptrauth_auth:
5394 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5395 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5396 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5397 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5398 case Builtin::BI__builtin_ptrauth_strip: {
5399 // Emit the arguments.
5401 for (auto argExpr : E->arguments())
5402 Args.push_back(EmitScalarExpr(argExpr));
5403
5404 // Cast the value to intptr_t, saving its original type.
5405 llvm::Type *OrigValueType = Args[0]->getType();
5406 if (OrigValueType->isPointerTy())
5407 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5408
5409 switch (BuiltinID) {
5410 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5411 if (Args[4]->getType()->isPointerTy())
5412 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5413 [[fallthrough]];
5414
5415 case Builtin::BI__builtin_ptrauth_auth:
5416 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5417 if (Args[2]->getType()->isPointerTy())
5418 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5419 break;
5420
5421 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5422 if (Args[1]->getType()->isPointerTy())
5423 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5424 break;
5425
5426 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5427 case Builtin::BI__builtin_ptrauth_strip:
5428 break;
5429 }
5430
5431 // Call the intrinsic.
5432 auto IntrinsicID = [&]() -> unsigned {
5433 switch (BuiltinID) {
5434 case Builtin::BI__builtin_ptrauth_auth:
5435 return llvm::Intrinsic::ptrauth_auth;
5436 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5437 return llvm::Intrinsic::ptrauth_resign;
5438 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5439 return llvm::Intrinsic::ptrauth_blend;
5440 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5441 return llvm::Intrinsic::ptrauth_sign_generic;
5442 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5443 return llvm::Intrinsic::ptrauth_sign;
5444 case Builtin::BI__builtin_ptrauth_strip:
5445 return llvm::Intrinsic::ptrauth_strip;
5446 }
5447 llvm_unreachable("bad ptrauth intrinsic");
5448 }();
5449 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5450 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5451
5452 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5453 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5454 OrigValueType->isPointerTy()) {
5455 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5456 }
5457 return RValue::get(Result);
5458 }
5459
5460 case Builtin::BI__exception_code:
5461 case Builtin::BI_exception_code:
5463 case Builtin::BI__exception_info:
5464 case Builtin::BI_exception_info:
5466 case Builtin::BI__abnormal_termination:
5467 case Builtin::BI_abnormal_termination:
5469 case Builtin::BI_setjmpex:
5470 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5471 E->getArg(0)->getType()->isPointerType())
5472 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5473 break;
5474 case Builtin::BI_setjmp:
5475 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5476 E->getArg(0)->getType()->isPointerType()) {
5477 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5478 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5479 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5480 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5481 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5482 }
5483 break;
5484
5485 // C++ std:: builtins.
5486 case Builtin::BImove:
5487 case Builtin::BImove_if_noexcept:
5488 case Builtin::BIforward:
5489 case Builtin::BIforward_like:
5490 case Builtin::BIas_const:
5491 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5492 case Builtin::BI__GetExceptionInfo: {
5493 if (llvm::GlobalVariable *GV =
5495 return RValue::get(GV);
5496 break;
5497 }
5498
5499 case Builtin::BI__fastfail:
5500 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5501
5502 case Builtin::BI__builtin_coro_id:
5503 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5504 case Builtin::BI__builtin_coro_promise:
5505 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5506 case Builtin::BI__builtin_coro_resume:
5507 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5508 return RValue::get(nullptr);
5509 case Builtin::BI__builtin_coro_frame:
5510 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5511 case Builtin::BI__builtin_coro_noop:
5512 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5513 case Builtin::BI__builtin_coro_free:
5514 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5515 case Builtin::BI__builtin_coro_destroy:
5516 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5517 return RValue::get(nullptr);
5518 case Builtin::BI__builtin_coro_done:
5519 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5520 case Builtin::BI__builtin_coro_alloc:
5521 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5522 case Builtin::BI__builtin_coro_begin:
5523 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5524 case Builtin::BI__builtin_coro_end:
5525 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5526 case Builtin::BI__builtin_coro_suspend:
5527 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5528 case Builtin::BI__builtin_coro_size:
5529 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5530 case Builtin::BI__builtin_coro_align:
5531 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5532
5533 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5534 case Builtin::BIread_pipe:
5535 case Builtin::BIwrite_pipe: {
5536 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5537 *Arg1 = EmitScalarExpr(E->getArg(1));
5538 CGOpenCLRuntime OpenCLRT(CGM);
5539 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5540 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5541
5542 // Type of the generic packet parameter.
5543 unsigned GenericAS =
5545 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5546
5547 // Testing which overloaded version we should generate the call for.
5548 if (2U == E->getNumArgs()) {
5549 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5550 : "__write_pipe_2";
5551 // Creating a generic function type to be able to call with any builtin or
5552 // user defined type.
5553 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5554 llvm::FunctionType *FTy = llvm::FunctionType::get(
5555 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5556 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
5557 return RValue::get(
5559 {Arg0, BCast, PacketSize, PacketAlign}));
5560 } else {
5561 assert(4 == E->getNumArgs() &&
5562 "Illegal number of parameters to pipe function");
5563 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5564 : "__write_pipe_4";
5565
5566 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5567 Int32Ty, Int32Ty};
5568 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5569 *Arg3 = EmitScalarExpr(E->getArg(3));
5570 llvm::FunctionType *FTy = llvm::FunctionType::get(
5571 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5572 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
5573 // We know the third argument is an integer type, but we may need to cast
5574 // it to i32.
5575 if (Arg2->getType() != Int32Ty)
5576 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5577 return RValue::get(
5579 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
5580 }
5581 }
5582 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5583 // functions
5584 case Builtin::BIreserve_read_pipe:
5585 case Builtin::BIreserve_write_pipe:
5586 case Builtin::BIwork_group_reserve_read_pipe:
5587 case Builtin::BIwork_group_reserve_write_pipe:
5588 case Builtin::BIsub_group_reserve_read_pipe:
5589 case Builtin::BIsub_group_reserve_write_pipe: {
5590 // Composing the mangled name for the function.
5591 const char *Name;
5592 if (BuiltinID == Builtin::BIreserve_read_pipe)
5593 Name = "__reserve_read_pipe";
5594 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5595 Name = "__reserve_write_pipe";
5596 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5597 Name = "__work_group_reserve_read_pipe";
5598 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5599 Name = "__work_group_reserve_write_pipe";
5600 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5601 Name = "__sub_group_reserve_read_pipe";
5602 else
5603 Name = "__sub_group_reserve_write_pipe";
5604
5605 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5606 *Arg1 = EmitScalarExpr(E->getArg(1));
5607 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5608 CGOpenCLRuntime OpenCLRT(CGM);
5609 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5610 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5611
5612 // Building the generic function prototype.
5613 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5614 llvm::FunctionType *FTy = llvm::FunctionType::get(
5615 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5616 // We know the second argument is an integer type, but we may need to cast
5617 // it to i32.
5618 if (Arg1->getType() != Int32Ty)
5619 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5621 {Arg0, Arg1, PacketSize, PacketAlign}));
5622 }
5623 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5624 // functions
5625 case Builtin::BIcommit_read_pipe:
5626 case Builtin::BIcommit_write_pipe:
5627 case Builtin::BIwork_group_commit_read_pipe:
5628 case Builtin::BIwork_group_commit_write_pipe:
5629 case Builtin::BIsub_group_commit_read_pipe:
5630 case Builtin::BIsub_group_commit_write_pipe: {
5631 const char *Name;
5632 if (BuiltinID == Builtin::BIcommit_read_pipe)
5633 Name = "__commit_read_pipe";
5634 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5635 Name = "__commit_write_pipe";
5636 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5637 Name = "__work_group_commit_read_pipe";
5638 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5639 Name = "__work_group_commit_write_pipe";
5640 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5641 Name = "__sub_group_commit_read_pipe";
5642 else
5643 Name = "__sub_group_commit_write_pipe";
5644
5645 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5646 *Arg1 = EmitScalarExpr(E->getArg(1));
5647 CGOpenCLRuntime OpenCLRT(CGM);
5648 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5649 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5650
5651 // Building the generic function prototype.
5652 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5653 llvm::FunctionType *FTy =
5654 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5655 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5656
5658 {Arg0, Arg1, PacketSize, PacketAlign}));
5659 }
5660 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5661 case Builtin::BIget_pipe_num_packets:
5662 case Builtin::BIget_pipe_max_packets: {
5663 const char *BaseName;
5664 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5665 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5666 BaseName = "__get_pipe_num_packets";
5667 else
5668 BaseName = "__get_pipe_max_packets";
5669 std::string Name = std::string(BaseName) +
5670 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
5671
5672 // Building the generic function prototype.
5673 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5674 CGOpenCLRuntime OpenCLRT(CGM);
5675 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5676 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5677 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
5678 llvm::FunctionType *FTy = llvm::FunctionType::get(
5679 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5680
5682 {Arg0, PacketSize, PacketAlign}));
5683 }
5684
5685 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
5686 case Builtin::BIto_global:
5687 case Builtin::BIto_local:
5688 case Builtin::BIto_private: {
5689 auto Arg0 = EmitScalarExpr(E->getArg(0));
5690 auto NewArgT = llvm::PointerType::get(
5693 auto NewRetT = llvm::PointerType::get(
5697 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
5698 llvm::Value *NewArg;
5699 if (Arg0->getType()->getPointerAddressSpace() !=
5700 NewArgT->getPointerAddressSpace())
5701 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
5702 else
5703 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
5704 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
5705 auto NewCall =
5706 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
5707 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
5708 ConvertType(E->getType())));
5709 }
5710
5711 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5712 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
5713 // The code below expands the builtin call to a call to one of the following
5714 // functions that an OpenCL runtime library will have to provide:
5715 // __enqueue_kernel_basic
5716 // __enqueue_kernel_varargs
5717 // __enqueue_kernel_basic_events
5718 // __enqueue_kernel_events_varargs
5719 case Builtin::BIenqueue_kernel: {
5720 StringRef Name; // Generated function call name
5721 unsigned NumArgs = E->getNumArgs();
5722
5723 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5724 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5725 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5726
5727 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
5728 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
5729 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
5730 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
5731 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
5732
5733 if (NumArgs == 4) {
5734 // The most basic form of the call with parameters:
5735 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5736 Name = "__enqueue_kernel_basic";
5737 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5738 GenericVoidPtrTy};
5739 llvm::FunctionType *FTy = llvm::FunctionType::get(
5740 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5741
5742 auto Info =
5743 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5744 llvm::Value *Kernel =
5745 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5746 llvm::Value *Block =
5747 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5748
5749 AttrBuilder B(Builder.getContext());
5750 B.addByValAttr(NDRangeL.getAddress().getElementType());
5751 llvm::AttributeList ByValAttrSet =
5752 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
5753
5754 auto RTCall =
5755 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
5756 {Queue, Flags, Range, Kernel, Block});
5757 RTCall->setAttributes(ByValAttrSet);
5758 return RValue::get(RTCall);
5759 }
5760 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5761
5762 // Create a temporary array to hold the sizes of local pointer arguments
5763 // for the block. \p First is the position of the first size argument.
5764 auto CreateArrayForSizeVar = [=](unsigned First)
5765 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5766 llvm::APInt ArraySize(32, NumArgs - First);
5768 getContext().getSizeType(), ArraySize, nullptr,
5770 /*IndexTypeQuals=*/0);
5771 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
5772 llvm::Value *TmpPtr = Tmp.getPointer();
5773 llvm::Value *TmpSize = EmitLifetimeStart(
5774 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5775 llvm::Value *ElemPtr;
5776 // Each of the following arguments specifies the size of the corresponding
5777 // argument passed to the enqueued block.
5778 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
5779 for (unsigned I = First; I < NumArgs; ++I) {
5780 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5781 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5782 {Zero, Index});
5783 if (I == First)
5784 ElemPtr = GEP;
5785 auto *V =
5786 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
5788 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
5789 }
5790 return std::tie(ElemPtr, TmpSize, TmpPtr);
5791 };
5792
5793 // Could have events and/or varargs.
5794 if (E->getArg(3)->getType()->isBlockPointerType()) {
5795 // No events passed, but has variadic arguments.
5796 Name = "__enqueue_kernel_varargs";
5797 auto Info =
5798 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5799 llvm::Value *Kernel =
5800 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5801 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5802 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5803 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
5804
5805 // Create a vector of the arguments, as well as a constant value to
5806 // express to the runtime the number of variadic arguments.
5807 llvm::Value *const Args[] = {Queue, Flags,
5808 Range, Kernel,
5809 Block, ConstantInt::get(IntTy, NumArgs - 4),
5810 ElemPtr};
5811 llvm::Type *const ArgTys[] = {
5812 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
5813 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5814
5815 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
5816 auto Call = RValue::get(
5817 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
5818 if (TmpSize)
5819 EmitLifetimeEnd(TmpSize, TmpPtr);
5820 return Call;
5821 }
5822 // Any calls now have event arguments passed.
5823 if (NumArgs >= 7) {
5824 llvm::PointerType *PtrTy = llvm::PointerType::get(
5827
5828 llvm::Value *NumEvents =
5829 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
5830
5831 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5832 // to be a null pointer constant (including `0` literal), we can take it
5833 // into account and emit null pointer directly.
5834 llvm::Value *EventWaitList = nullptr;
5835 if (E->getArg(4)->isNullPointerConstant(
5837 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
5838 } else {
5839 EventWaitList =
5840 E->getArg(4)->getType()->isArrayType()
5841 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
5842 : EmitScalarExpr(E->getArg(4));
5843 // Convert to generic address space.
5844 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
5845 }
5846 llvm::Value *EventRet = nullptr;
5847 if (E->getArg(5)->isNullPointerConstant(
5849 EventRet = llvm::ConstantPointerNull::get(PtrTy);
5850 } else {
5851 EventRet =
5852 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
5853 }
5854
5855 auto Info =
5856 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
5857 llvm::Value *Kernel =
5858 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5859 llvm::Value *Block =
5860 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5861
5862 std::vector<llvm::Type *> ArgTys = {
5863 QueueTy, Int32Ty, RangeTy, Int32Ty,
5864 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
5865
5866 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
5867 NumEvents, EventWaitList, EventRet,
5868 Kernel, Block};
5869
5870 if (NumArgs == 7) {
5871 // Has events but no variadics.
5872 Name = "__enqueue_kernel_basic_events";
5873 llvm::FunctionType *FTy = llvm::FunctionType::get(
5874 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5875 return RValue::get(
5878 }
5879 // Has event info and variadics
5880 // Pass the number of variadics to the runtime function too.
5881 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5882 ArgTys.push_back(Int32Ty);
5883 Name = "__enqueue_kernel_events_varargs";
5884
5885 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5886 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5887 Args.push_back(ElemPtr);
5888 ArgTys.push_back(ElemPtr->getType());
5889
5890 llvm::FunctionType *FTy = llvm::FunctionType::get(
5891 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5892 auto Call =
5895 if (TmpSize)
5896 EmitLifetimeEnd(TmpSize, TmpPtr);
5897 return Call;
5898 }
5899 llvm_unreachable("Unexpected enqueue_kernel signature");
5900 }
5901 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5902 // parameter.
5903 case Builtin::BIget_kernel_work_group_size: {
5904 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5905 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5906 auto Info =
5907 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5908 Value *Kernel =
5909 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5910 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5913 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5914 false),
5915 "__get_kernel_work_group_size_impl"),
5916 {Kernel, Arg}));
5917 }
5918 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5919 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5920 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5921 auto Info =
5922 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5923 Value *Kernel =
5924 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5925 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5928 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5929 false),
5930 "__get_kernel_preferred_work_group_size_multiple_impl"),
5931 {Kernel, Arg}));
5932 }
5933 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5934 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5935 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5936 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5937 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5938 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
5939 auto Info =
5940 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
5941 Value *Kernel =
5942 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5943 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5944 const char *Name =
5945 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5946 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5947 : "__get_kernel_sub_group_count_for_ndrange_impl";
5950 llvm::FunctionType::get(
5951 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5952 false),
5953 Name),
5954 {NDRange, Kernel, Block}));
5955 }
5956 case Builtin::BI__builtin_store_half:
5957 case Builtin::BI__builtin_store_halff: {
5958 Value *Val = EmitScalarExpr(E->getArg(0));
5960 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5961 Builder.CreateStore(HalfVal, Address);
5962 return RValue::get(nullptr);
5963 }
5964 case Builtin::BI__builtin_load_half: {
5966 Value *HalfVal = Builder.CreateLoad(Address);
5967 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5968 }
5969 case Builtin::BI__builtin_load_halff: {
5971 Value *HalfVal = Builder.CreateLoad(Address);
5972 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
5973 }
5974 case Builtin::BI__builtin_printf:
5975 case Builtin::BIprintf:
5976 if (getTarget().getTriple().isNVPTX() ||
5977 getTarget().getTriple().isAMDGCN() ||
5978 (getTarget().getTriple().isSPIRV() &&
5979 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
5980 if (getTarget().getTriple().isNVPTX())
5982 if ((getTarget().getTriple().isAMDGCN() ||
5983 getTarget().getTriple().isSPIRV()) &&
5984 getLangOpts().HIP)
5986 }
5987
5988 break;
5989 case Builtin::BI__builtin_canonicalize:
5990 case Builtin::BI__builtin_canonicalizef:
5991 case Builtin::BI__builtin_canonicalizef16:
5992 case Builtin::BI__builtin_canonicalizel:
5993 return RValue::get(
5994 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
5995
5996 case Builtin::BI__builtin_thread_pointer: {
5997 if (!getContext().getTargetInfo().isTLSSupported())
5998 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
5999 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6000 break;
6001 }
6002 case Builtin::BI__builtin_os_log_format:
6003 return emitBuiltinOSLogFormat(*E);
6004
6005 case Builtin::BI__xray_customevent: {
6007 return RValue::getIgnored();
6008
6011 return RValue::getIgnored();
6012
6013 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6014 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6015 return RValue::getIgnored();
6016
6017 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6018 auto FTy = F->getFunctionType();
6019 auto Arg0 = E->getArg(0);
6020 auto Arg0Val = EmitScalarExpr(Arg0);
6021 auto Arg0Ty = Arg0->getType();
6022 auto PTy0 = FTy->getParamType(0);
6023 if (PTy0 != Arg0Val->getType()) {
6024 if (Arg0Ty->isArrayType())
6025 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6026 else
6027 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6028 }
6029 auto Arg1 = EmitScalarExpr(E->getArg(1));
6030 auto PTy1 = FTy->getParamType(1);
6031 if (PTy1 != Arg1->getType())
6032 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6033 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6034 }
6035
6036 case Builtin::BI__xray_typedevent: {
6037 // TODO: There should be a way to always emit events even if the current
6038 // function is not instrumented. Losing events in a stream can cripple
6039 // a trace.
6041 return RValue::getIgnored();
6042
6045 return RValue::getIgnored();
6046
6047 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6048 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6049 return RValue::getIgnored();
6050
6051 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6052 auto FTy = F->getFunctionType();
6053 auto Arg0 = EmitScalarExpr(E->getArg(0));
6054 auto PTy0 = FTy->getParamType(0);
6055 if (PTy0 != Arg0->getType())
6056 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6057 auto Arg1 = E->getArg(1);
6058 auto Arg1Val = EmitScalarExpr(Arg1);
6059 auto Arg1Ty = Arg1->getType();
6060 auto PTy1 = FTy->getParamType(1);
6061 if (PTy1 != Arg1Val->getType()) {
6062 if (Arg1Ty->isArrayType())
6063 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6064 else
6065 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6066 }
6067 auto Arg2 = EmitScalarExpr(E->getArg(2));
6068 auto PTy2 = FTy->getParamType(2);
6069 if (PTy2 != Arg2->getType())
6070 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6071 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6072 }
6073
6074 case Builtin::BI__builtin_ms_va_start:
6075 case Builtin::BI__builtin_ms_va_end:
6076 return RValue::get(
6078 BuiltinID == Builtin::BI__builtin_ms_va_start));
6079
6080 case Builtin::BI__builtin_ms_va_copy: {
6081 // Lower this manually. We can't reliably determine whether or not any
6082 // given va_copy() is for a Win64 va_list from the calling convention
6083 // alone, because it's legal to do this from a System V ABI function.
6084 // With opaque pointer types, we won't have enough information in LLVM
6085 // IR to determine this from the argument types, either. Best to do it
6086 // now, while we have enough information.
6087 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6088 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6089
6090 DestAddr = DestAddr.withElementType(Int8PtrTy);
6091 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6092
6093 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6094 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6095 }
6096
6097 case Builtin::BI__builtin_get_device_side_mangled_name: {
6098 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6099 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6100 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6101 return RValue::get(Str.getPointer());
6102 }
6103 }
6104
6105 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6106 // the call using the normal call path, but using the unmangled
6107 // version of the function name.
6108 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6109 return emitLibraryCall(*this, FD, E,
6110 CGM.getBuiltinLibFunction(FD, BuiltinID));
6111
6112 // If this is a predefined lib function (e.g. malloc), emit the call
6113 // using exactly the normal call path.
6114 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6115 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6116
6117 // Check that a call to a target specific builtin has the correct target
6118 // features.
6119 // This is down here to avoid non-target specific builtins, however, if
6120 // generic builtins start to require generic target features then we
6121 // can move this up to the beginning of the function.
6123
6124 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6125 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6126
6127 // See if we have a target specific intrinsic.
6128 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6129 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6130 StringRef Prefix =
6131 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6132 if (!Prefix.empty()) {
6133 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6134 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6135 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6136 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6137 // NOTE we don't need to perform a compatibility flag check here since the
6138 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6139 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6140 if (IntrinsicID == Intrinsic::not_intrinsic)
6141 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6142 }
6143
6144 if (IntrinsicID != Intrinsic::not_intrinsic) {
6146
6147 // Find out if any arguments are required to be integer constant
6148 // expressions.
6149 unsigned ICEArguments = 0;
6151 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6152 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6153
6154 Function *F = CGM.getIntrinsic(IntrinsicID);
6155 llvm::FunctionType *FTy = F->getFunctionType();
6156
6157 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6158 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6159 // If the intrinsic arg type is different from the builtin arg type
6160 // we need to do a bit cast.
6161 llvm::Type *PTy = FTy->getParamType(i);
6162 if (PTy != ArgValue->getType()) {
6163 // XXX - vector of pointers?
6164 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6165 if (PtrTy->getAddressSpace() !=
6166 ArgValue->getType()->getPointerAddressSpace()) {
6167 ArgValue = Builder.CreateAddrSpaceCast(
6168 ArgValue, llvm::PointerType::get(getLLVMContext(),
6169 PtrTy->getAddressSpace()));
6170 }
6171 }
6172
6173 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6174 // in amx intrinsics.
6175 if (PTy->isX86_AMXTy())
6176 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6177 {ArgValue->getType()}, {ArgValue});
6178 else
6179 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6180 }
6181
6182 Args.push_back(ArgValue);
6183 }
6184
6185 Value *V = Builder.CreateCall(F, Args);
6186 QualType BuiltinRetType = E->getType();
6187
6188 llvm::Type *RetTy = VoidTy;
6189 if (!BuiltinRetType->isVoidType())
6190 RetTy = ConvertType(BuiltinRetType);
6191
6192 if (RetTy != V->getType()) {
6193 // XXX - vector of pointers?
6194 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6195 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6197 V, llvm::PointerType::get(getLLVMContext(),
6198 PtrTy->getAddressSpace()));
6199 }
6200 }
6201
6202 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6203 // in amx intrinsics.
6204 if (V->getType()->isX86_AMXTy())
6205 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6206 {V});
6207 else
6208 V = Builder.CreateBitCast(V, RetTy);
6209 }
6210
6211 if (RetTy->isVoidTy())
6212 return RValue::get(nullptr);
6213
6214 return RValue::get(V);
6215 }
6216
6217 // Some target-specific builtins can have aggregate return values, e.g.
6218 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6219 // ReturnValue to be non-null, so that the target-specific emission code can
6220 // always just emit into it.
6222 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6223 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6224 ReturnValue = ReturnValueSlot(DestPtr, false);
6225 }
6226
6227 // Now see if we can emit a target-specific builtin.
6228 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6229 switch (EvalKind) {
6230 case TEK_Scalar:
6231 if (V->getType()->isVoidTy())
6232 return RValue::get(nullptr);
6233 return RValue::get(V);
6234 case TEK_Aggregate:
6235 return RValue::getAggregate(ReturnValue.getAddress(),
6236 ReturnValue.isVolatile());
6237 case TEK_Complex:
6238 llvm_unreachable("No current target builtin returns complex");
6239 }
6240 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6241 }
6242
6243 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6244 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E))
6245 return RValue::get(V);
6246
6247 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6248 return EmitHipStdParUnsupportedBuiltin(this, FD);
6249
6250 ErrorUnsupported(E, "builtin function");
6251
6252 // Unknown builtin, for now just dump it out and return undef.
6253 return GetUndefRValue(E->getType());
6254}
6255
6257 unsigned BuiltinID, const CallExpr *E,
6258 ReturnValueSlot ReturnValue,
6259 llvm::Triple::ArchType Arch) {
6260 // When compiling in HipStdPar mode we have to be conservative in rejecting
6261 // target specific features in the FE, and defer the possible error to the
6262 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6263 // referenced by an accelerator executable function, we emit an error.
6264 // Returning nullptr here leads to the builtin being handled in
6265 // EmitStdParUnsupportedBuiltin.
6266 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6267 Arch != CGF->getTarget().getTriple().getArch())
6268 return nullptr;
6269
6270 switch (Arch) {
6271 case llvm::Triple::arm:
6272 case llvm::Triple::armeb:
6273 case llvm::Triple::thumb:
6274 case llvm::Triple::thumbeb:
6275 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6276 case llvm::Triple::aarch64:
6277 case llvm::Triple::aarch64_32:
6278 case llvm::Triple::aarch64_be:
6279 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6280 case llvm::Triple::bpfeb:
6281 case llvm::Triple::bpfel:
6282 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6283 case llvm::Triple::x86:
6284 case llvm::Triple::x86_64:
6285 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6286 case llvm::Triple::ppc:
6287 case llvm::Triple::ppcle:
6288 case llvm::Triple::ppc64:
6289 case llvm::Triple::ppc64le:
6290 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6291 case llvm::Triple::r600:
6292 case llvm::Triple::amdgcn:
6293 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6294 case llvm::Triple::systemz:
6295 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6296 case llvm::Triple::nvptx:
6297 case llvm::Triple::nvptx64:
6298 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6299 case llvm::Triple::wasm32:
6300 case llvm::Triple::wasm64:
6301 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6302 case llvm::Triple::hexagon:
6303 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6304 case llvm::Triple::riscv32:
6305 case llvm::Triple::riscv64:
6306 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6307 case llvm::Triple::spirv64:
6308 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6309 return nullptr;
6310 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6311 default:
6312 return nullptr;
6313 }
6314}
6315
6317 const CallExpr *E,
6318 ReturnValueSlot ReturnValue) {
6319 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6320 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6322 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6323 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6324 }
6325
6326 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6327 getTarget().getTriple().getArch());
6328}
6329
6330static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6331 NeonTypeFlags TypeFlags,
6332 bool HasLegalHalfType = true,
6333 bool V1Ty = false,
6334 bool AllowBFloatArgsAndRet = true) {
6335 int IsQuad = TypeFlags.isQuad();
6336 switch (TypeFlags.getEltType()) {
6339 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6342 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6344 if (AllowBFloatArgsAndRet)
6345 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6346 else
6347 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6349 if (HasLegalHalfType)
6350 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6351 else
6352 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6354 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6357 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6359 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6360 // There is a lot of i128 and f128 API missing.
6361 // so we use v16i8 to represent poly128 and get pattern matched.
6362 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6364 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6366 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6367 }
6368 llvm_unreachable("Unknown vector element type!");
6369}
6370
6371static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6372 NeonTypeFlags IntTypeFlags) {
6373 int IsQuad = IntTypeFlags.isQuad();
6374 switch (IntTypeFlags.getEltType()) {
6376 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6378 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6380 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6381 default:
6382 llvm_unreachable("Type can't be converted to floating-point!");
6383 }
6384}
6385
6387 const ElementCount &Count) {
6388 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6389 return Builder.CreateShuffleVector(V, V, SV, "lane");
6390}
6391
6393 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6394 return EmitNeonSplat(V, C, EC);
6395}
6396
6398 const char *name,
6399 unsigned shift, bool rightshift) {
6400 unsigned j = 0;
6401 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6402 ai != ae; ++ai, ++j) {
6403 if (F->isConstrainedFPIntrinsic())
6404 if (ai->getType()->isMetadataTy())
6405 continue;
6406 if (shift > 0 && shift == j)
6407 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6408 else
6409 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6410 }
6411
6412 if (F->isConstrainedFPIntrinsic())
6413 return Builder.CreateConstrainedFPCall(F, Ops, name);
6414 else
6415 return Builder.CreateCall(F, Ops, name);
6416}
6417
6419 bool neg) {
6420 int SV = cast<ConstantInt>(V)->getSExtValue();
6421 return ConstantInt::get(Ty, neg ? -SV : SV);
6422}
6423
6424// Right-shift a vector by a constant.
6426 llvm::Type *Ty, bool usgn,
6427 const char *name) {
6428 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6429
6430 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6431 int EltSize = VTy->getScalarSizeInBits();
6432
6433 Vec = Builder.CreateBitCast(Vec, Ty);
6434
6435 // lshr/ashr are undefined when the shift amount is equal to the vector
6436 // element size.
6437 if (ShiftAmt == EltSize) {
6438 if (usgn) {
6439 // Right-shifting an unsigned value by its size yields 0.
6440 return llvm::ConstantAggregateZero::get(VTy);
6441 } else {
6442 // Right-shifting a signed value by its size is equivalent
6443 // to a shift of size-1.
6444 --ShiftAmt;
6445 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6446 }
6447 }
6448
6449 Shift = EmitNeonShiftVector(Shift, Ty, false);
6450 if (usgn)
6451 return Builder.CreateLShr(Vec, Shift, name);
6452 else
6453 return Builder.CreateAShr(Vec, Shift, name);
6454}
6455
6456enum {
6457 AddRetType = (1 << 0),
6458 Add1ArgType = (1 << 1),
6459 Add2ArgTypes = (1 << 2),
6460
6463
6465 UnsignedAlts = (1 << 6),
6466
6469
6477
6478namespace {
6479struct ARMVectorIntrinsicInfo {
6480 const char *NameHint;
6481 unsigned BuiltinID;
6482 unsigned LLVMIntrinsic;
6483 unsigned AltLLVMIntrinsic;
6485
6486 bool operator<(unsigned RHSBuiltinID) const {
6487 return BuiltinID < RHSBuiltinID;
6488 }
6489 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6490 return BuiltinID < TE.BuiltinID;
6491 }
6492};
6493} // end anonymous namespace
6494
6495#define NEONMAP0(NameBase) \
6496 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6497
6498#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6499 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6500 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6501
6502#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6503 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6504 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6505 TypeModifier }
6506
6507static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6508 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6509 NEONMAP0(splat_lane_v),
6510 NEONMAP0(splat_laneq_v),
6511 NEONMAP0(splatq_lane_v),
6512 NEONMAP0(splatq_laneq_v),
6513 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6514 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6515 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6516 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6517 NEONMAP0(vadd_v),
6518 NEONMAP0(vaddhn_v),
6519 NEONMAP0(vaddq_v),
6520 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6521 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6522 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6523 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6524 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6525 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6526 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6527 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6528 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6529 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6530 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6531 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6532 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6533 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6534 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6535 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6536 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6537 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6538 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6539 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6540 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6541 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6542 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6543 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6544 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6545 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6546 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6547 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6548 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6549 NEONMAP0(vceqz_v),
6550 NEONMAP0(vceqzq_v),
6551 NEONMAP0(vcgez_v),
6552 NEONMAP0(vcgezq_v),
6553 NEONMAP0(vcgtz_v),
6554 NEONMAP0(vcgtzq_v),
6555 NEONMAP0(vclez_v),
6556 NEONMAP0(vclezq_v),
6557 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6558 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6559 NEONMAP0(vcltz_v),
6560 NEONMAP0(vcltzq_v),
6561 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6562 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6563 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6564 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6565 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6566 NEONMAP0(vcvt_f16_s16),
6567 NEONMAP0(vcvt_f16_u16),
6568 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6569 NEONMAP0(vcvt_f32_v),
6570 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6571 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6572 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6573 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6574 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6575 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6576 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6577 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6578 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6579 NEONMAP0(vcvt_s16_f16),
6580 NEONMAP0(vcvt_s32_v),
6581 NEONMAP0(vcvt_s64_v),
6582 NEONMAP0(vcvt_u16_f16),
6583 NEONMAP0(vcvt_u32_v),
6584 NEONMAP0(vcvt_u64_v),
6585 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6586 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6587 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6588 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6589 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6590 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6591 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6592 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6593 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6594 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6595 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6596 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6597 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6598 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6599 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6600 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6601 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6602 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6603 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6604 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6605 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6606 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6607 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6608 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6609 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6610 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6611 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6612 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6613 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6614 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6615 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6616 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6617 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6618 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6619 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6620 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6621 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6622 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6623 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6624 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6625 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6626 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6627 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6628 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6629 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6630 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6631 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6632 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6633 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6634 NEONMAP0(vcvtq_f16_s16),
6635 NEONMAP0(vcvtq_f16_u16),
6636 NEONMAP0(vcvtq_f32_v),
6637 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6638 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6639 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6640 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6641 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6642 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6643 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6644 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6645 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6646 NEONMAP0(vcvtq_s16_f16),
6647 NEONMAP0(vcvtq_s32_v),
6648 NEONMAP0(vcvtq_s64_v),
6649 NEONMAP0(vcvtq_u16_f16),
6650 NEONMAP0(vcvtq_u32_v),
6651 NEONMAP0(vcvtq_u64_v),
6652 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6653 NEONMAP1(vdot_u32, arm_neon_udot, 0),
6654 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6655 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6656 NEONMAP0(vext_v),
6657 NEONMAP0(vextq_v),
6658 NEONMAP0(vfma_v),
6659 NEONMAP0(vfmaq_v),
6660 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6661 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6662 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6663 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6664 NEONMAP0(vld1_dup_v),
6665 NEONMAP1(vld1_v, arm_neon_vld1, 0),
6666 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
6667 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
6668 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
6669 NEONMAP0(vld1q_dup_v),
6670 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
6671 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
6672 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
6673 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
6674 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
6675 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
6676 NEONMAP1(vld2_v, arm_neon_vld2, 0),
6677 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
6678 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
6679 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
6680 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
6681 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
6682 NEONMAP1(vld3_v, arm_neon_vld3, 0),
6683 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
6684 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
6685 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
6686 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
6687 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
6688 NEONMAP1(vld4_v, arm_neon_vld4, 0),
6689 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
6690 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
6691 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
6692 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6693 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
6694 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
6695 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6696 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6697 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
6698 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
6699 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6700 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
6701 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
6702 NEONMAP0(vmovl_v),
6703 NEONMAP0(vmovn_v),
6704 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
6705 NEONMAP0(vmull_v),
6706 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
6707 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6708 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6709 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
6710 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6711 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6712 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
6713 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
6714 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
6715 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
6716 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6717 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6718 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6719 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6720 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6721 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6722 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6723 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6724 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6725 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6726 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6727 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6728 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6729 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6730 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6731 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6732 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6733 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6734 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6735 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6736 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6737 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6738 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6739 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6740 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6741 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6742 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6743 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6744 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6745 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6746 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6747 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6748 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6749 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6750 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6751 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6752 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6753 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6754 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6755 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6756 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6757 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6758 NEONMAP0(vrndi_v),
6759 NEONMAP0(vrndiq_v),
6760 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6761 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6762 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6763 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6764 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6765 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6766 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6767 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6768 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6769 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6770 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6771 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6772 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6773 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6774 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6775 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6776 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6777 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6778 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6779 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6780 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6781 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6782 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6783 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6784 NEONMAP0(vshl_n_v),
6785 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6786 NEONMAP0(vshll_n_v),
6787 NEONMAP0(vshlq_n_v),
6788 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6789 NEONMAP0(vshr_n_v),
6790 NEONMAP0(vshrn_n_v),
6791 NEONMAP0(vshrq_n_v),
6792 NEONMAP1(vst1_v, arm_neon_vst1, 0),
6793 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6794 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6795 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6796 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6797 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6798 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6799 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6800 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6801 NEONMAP1(vst2_v, arm_neon_vst2, 0),
6802 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6803 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6804 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6805 NEONMAP1(vst3_v, arm_neon_vst3, 0),
6806 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6807 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6808 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6809 NEONMAP1(vst4_v, arm_neon_vst4, 0),
6810 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6811 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6812 NEONMAP0(vsubhn_v),
6813 NEONMAP0(vtrn_v),
6814 NEONMAP0(vtrnq_v),
6815 NEONMAP0(vtst_v),
6816 NEONMAP0(vtstq_v),
6817 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6818 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6819 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6820 NEONMAP0(vuzp_v),
6821 NEONMAP0(vuzpq_v),
6822 NEONMAP0(vzip_v),
6823 NEONMAP0(vzipq_v)
6824};
6825
6826static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6827 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6828 NEONMAP0(splat_lane_v),
6829 NEONMAP0(splat_laneq_v),
6830 NEONMAP0(splatq_lane_v),
6831 NEONMAP0(splatq_laneq_v),
6832 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6833 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6834 NEONMAP0(vadd_v),
6835 NEONMAP0(vaddhn_v),
6836 NEONMAP0(vaddq_p128),
6837 NEONMAP0(vaddq_v),
6838 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6839 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6840 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6841 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6842 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6843 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6844 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6845 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6846 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6847 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6848 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6849 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6850 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6851 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6852 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6853 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6854 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6855 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6856 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6857 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6858 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6859 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6860 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6861 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6862 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6863 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6864 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6865 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6866 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6867 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6868 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6869 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6870 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6871 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6872 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6873 NEONMAP0(vceqz_v),
6874 NEONMAP0(vceqzq_v),
6875 NEONMAP0(vcgez_v),
6876 NEONMAP0(vcgezq_v),
6877 NEONMAP0(vcgtz_v),
6878 NEONMAP0(vcgtzq_v),
6879 NEONMAP0(vclez_v),
6880 NEONMAP0(vclezq_v),
6881 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6882 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6883 NEONMAP0(vcltz_v),
6884 NEONMAP0(vcltzq_v),
6885 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6886 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6887 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6888 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6889 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6890 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6891 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6892 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6893 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6894 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6895 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6896 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6897 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6898 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6899 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6900 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6901 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6902 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6903 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6904 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6905 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6906 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6907 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6908 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6909 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6910 NEONMAP0(vcvt_f16_s16),
6911 NEONMAP0(vcvt_f16_u16),
6912 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6913 NEONMAP0(vcvt_f32_v),
6914 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6915 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6916 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6917 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6918 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6919 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6920 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6921 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6922 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6923 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6924 NEONMAP0(vcvtq_f16_s16),
6925 NEONMAP0(vcvtq_f16_u16),
6926 NEONMAP0(vcvtq_f32_v),
6927 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6928 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6929 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6930 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6931 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6932 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6933 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6934 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6935 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6936 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6937 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6938 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6939 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6940 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6941 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6942 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6943 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6944 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6945 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6946 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6947 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6948 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6949 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6950 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6951 NEONMAP0(vext_v),
6952 NEONMAP0(vextq_v),
6953 NEONMAP0(vfma_v),
6954 NEONMAP0(vfmaq_v),
6955 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6956 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6957 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6958 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6959 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6960 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6961 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6962 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6963 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6964 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6965 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6966 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6967 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6968 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6969 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6970 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6971 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6972 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6973 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
6974 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
6975 NEONMAP0(vmovl_v),
6976 NEONMAP0(vmovn_v),
6977 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6978 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6979 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6980 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6981 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6982 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6983 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6984 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6985 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6986 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6987 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6988 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
6989 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
6990 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6991 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
6992 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
6993 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6994 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
6995 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
6996 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
6997 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
6998 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
6999 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7000 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7001 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7002 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7003 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7004 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7005 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7006 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7007 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7008 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7009 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7010 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7011 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7012 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7013 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7014 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7015 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7016 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7017 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7018 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7019 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7020 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7021 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7022 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7023 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7024 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7025 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7026 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7027 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7028 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7029 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7030 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7031 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7032 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7033 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7034 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7035 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7036 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7037 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7038 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7039 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7040 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7041 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7042 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7043 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7044 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7045 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7046 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7047 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7048 NEONMAP0(vrndi_v),
7049 NEONMAP0(vrndiq_v),
7050 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7051 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7052 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7053 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7054 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7055 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7056 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7057 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7058 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7059 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7060 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7061 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7062 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7063 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7064 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7065 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7066 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7067 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7068 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7069 NEONMAP0(vshl_n_v),
7070 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7071 NEONMAP0(vshll_n_v),
7072 NEONMAP0(vshlq_n_v),
7073 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7074 NEONMAP0(vshr_n_v),
7075 NEONMAP0(vshrn_n_v),
7076 NEONMAP0(vshrq_n_v),
7077 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7078 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7079 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7080 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7081 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7082 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7083 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7084 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7085 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7086 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7087 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7088 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7089 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7090 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7091 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7092 NEONMAP0(vsubhn_v),
7093 NEONMAP0(vtst_v),
7094 NEONMAP0(vtstq_v),
7095 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7096 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7097 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7098 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7099};
7100
7101static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7102 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7103 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7104 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7105 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7106 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7107 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7108 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7109 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7110 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7111 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7112 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7113 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7114 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7115 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7116 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7117 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7118 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7119 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7120 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7121 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7122 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7123 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7124 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7125 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7126 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7127 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7128 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7129 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7130 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7131 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7132 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7133 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7134 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7135 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7136 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7137 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7138 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7139 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7140 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7141 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7142 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7143 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7144 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7145 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7146 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7147 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7148 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7149 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7150 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7151 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7152 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7153 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7154 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7155 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7156 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7157 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7158 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7159 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7160 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7161 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7162 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7163 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7164 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7165 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7166 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7167 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7168 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7169 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7170 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7171 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7172 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7173 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7174 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7175 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7176 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7177 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7178 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7179 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7180 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7181 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7182 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7183 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7184 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7185 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7186 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7187 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7188 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7189 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7190 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7191 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7192 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7193 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7194 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7195 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7196 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7197 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7198 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7199 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7200 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7201 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7202 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7203 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7204 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7205 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7206 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7207 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7208 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7209 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7210 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7211 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7212 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7213 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7214 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7215 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7216 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7217 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7218 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7219 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7220 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7221 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7222 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7223 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7224 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7225 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7226 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7227 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7228 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7229 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7230 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7231 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7232 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7233 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7234 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7235 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7236 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7237 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7238 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7239 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7240 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7241 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7242 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7243 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7244 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7245 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7246 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7247 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7248 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7249 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7250 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7251 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7252 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7253 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7254 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7255 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7256 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7257 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7258 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7259 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7260 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7261 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7262 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7263 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7264 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7265 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7266 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7267 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7268 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7269 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7270 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7271 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7272 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7273 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7274 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7275 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7276 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7277 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7278 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7279 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7280 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7281 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7282 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7283 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7284 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7285 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7286 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7287 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7288 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7289 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7290 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7291 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7292 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7293 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7294 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7295 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7296 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7297 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7298 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7299 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7300 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7301 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7302 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7303 // FP16 scalar intrinisics go here.
7304 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7305 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7306 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7307 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7308 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7309 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7310 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7311 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7312 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7313 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7314 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7315 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7316 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7317 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7318 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7319 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7320 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7321 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7322 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7323 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7324 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7325 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7326 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7327 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7328 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7329 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7330 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7331 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7332 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7333 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7334 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7335 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7336 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7337 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7338};
7339
7340// Some intrinsics are equivalent for codegen.
7341static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7342 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7343 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7344 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7345 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7346 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7347 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7348 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7349 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7350 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7351 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7352 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7353 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7354 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7355 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7356 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7357 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7358 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7359 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7360 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7361 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7362 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7363 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7364 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7365 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7366 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7367 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7368 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7369 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7370 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7371 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7372 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7373 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7374 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7375 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7376 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7377 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7378 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7379 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7380 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7381 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7382 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7383 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7384 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7385 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7386 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7387 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7388 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7389 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7390 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7391 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7392 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7393 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7394 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7395 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7396 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7397 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7398 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7399 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7400 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7401 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7402 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7403 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7404 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7405 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7406 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7407 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7408 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7409 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7410 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7411 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7412 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7413 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7414 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7415 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7416 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7417 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7418 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7419 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7420 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7421 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7422 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7423 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7424 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7425 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7426 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7427 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7428 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7429 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7430 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7431 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7432 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7433 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7434 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7435 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7436 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7437 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7438 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7439 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7440 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7441 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7442 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7443 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7444 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7445 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7446 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7447 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7448 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7449 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7450 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7451 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7452 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7453 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7454 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7455 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7456 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7457 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7458 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7459 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7460 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7461 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7462 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7463 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7464 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7465 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7466 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7467 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7468 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7469 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7470 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7471 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7472 // arbitrary one to be handled as tha canonical variation.
7473 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7474 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7475 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7476 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7477 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7478 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7479 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7480 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7481 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7482 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7483 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7484 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7485};
7486
7487#undef NEONMAP0
7488#undef NEONMAP1
7489#undef NEONMAP2
7490
7491#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7492 { \
7493 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7494 TypeModifier \
7495 }
7496
7497#define SVEMAP2(NameBase, TypeModifier) \
7498 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7499static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7500#define GET_SVE_LLVM_INTRINSIC_MAP
7501#include "clang/Basic/arm_sve_builtin_cg.inc"
7502#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7503#undef GET_SVE_LLVM_INTRINSIC_MAP
7504};
7505
7506#undef SVEMAP1
7507#undef SVEMAP2
7508
7509#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7510 { \
7511 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7512 TypeModifier \
7513 }
7514
7515#define SMEMAP2(NameBase, TypeModifier) \
7516 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7517static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7518#define GET_SME_LLVM_INTRINSIC_MAP
7519#include "clang/Basic/arm_sme_builtin_cg.inc"
7520#undef GET_SME_LLVM_INTRINSIC_MAP
7521};
7522
7523#undef SMEMAP1
7524#undef SMEMAP2
7525
7527
7532
7533static const ARMVectorIntrinsicInfo *
7535 unsigned BuiltinID, bool &MapProvenSorted) {
7536
7537#ifndef NDEBUG
7538 if (!MapProvenSorted) {
7539 assert(llvm::is_sorted(IntrinsicMap));
7540 MapProvenSorted = true;
7541 }
7542#endif
7543
7544 const ARMVectorIntrinsicInfo *Builtin =
7545 llvm::lower_bound(IntrinsicMap, BuiltinID);
7546
7547 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7548 return Builtin;
7549
7550 return nullptr;
7551}
7552
7554 unsigned Modifier,
7555 llvm::Type *ArgType,
7556 const CallExpr *E) {
7557 int VectorSize = 0;
7558 if (Modifier & Use64BitVectors)
7559 VectorSize = 64;
7560 else if (Modifier & Use128BitVectors)
7561 VectorSize = 128;
7562
7563 // Return type.
7565 if (Modifier & AddRetType) {
7566 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7567 if (Modifier & VectorizeRetType)
7568 Ty = llvm::FixedVectorType::get(
7569 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7570
7571 Tys.push_back(Ty);
7572 }
7573
7574 // Arguments.
7575 if (Modifier & VectorizeArgTypes) {
7576 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7577 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7578 }
7579
7580 if (Modifier & (Add1ArgType | Add2ArgTypes))
7581 Tys.push_back(ArgType);
7582
7583 if (Modifier & Add2ArgTypes)
7584 Tys.push_back(ArgType);
7585
7586 if (Modifier & InventFloatType)
7587 Tys.push_back(FloatTy);
7588
7589 return CGM.getIntrinsic(IntrinsicID, Tys);
7590}
7591
7593 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7594 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7595 unsigned BuiltinID = SISDInfo.BuiltinID;
7596 unsigned int Int = SISDInfo.LLVMIntrinsic;
7597 unsigned Modifier = SISDInfo.TypeModifier;
7598 const char *s = SISDInfo.NameHint;
7599
7600 switch (BuiltinID) {
7601 case NEON::BI__builtin_neon_vcled_s64:
7602 case NEON::BI__builtin_neon_vcled_u64:
7603 case NEON::BI__builtin_neon_vcles_f32:
7604 case NEON::BI__builtin_neon_vcled_f64:
7605 case NEON::BI__builtin_neon_vcltd_s64:
7606 case NEON::BI__builtin_neon_vcltd_u64:
7607 case NEON::BI__builtin_neon_vclts_f32:
7608 case NEON::BI__builtin_neon_vcltd_f64:
7609 case NEON::BI__builtin_neon_vcales_f32:
7610 case NEON::BI__builtin_neon_vcaled_f64:
7611 case NEON::BI__builtin_neon_vcalts_f32:
7612 case NEON::BI__builtin_neon_vcaltd_f64:
7613 // Only one direction of comparisons actually exist, cmle is actually a cmge
7614 // with swapped operands. The table gives us the right intrinsic but we
7615 // still need to do the swap.
7616 std::swap(Ops[0], Ops[1]);
7617 break;
7618 }
7619
7620 assert(Int && "Generic code assumes a valid intrinsic");
7621
7622 // Determine the type(s) of this overloaded AArch64 intrinsic.
7623 const Expr *Arg = E->getArg(0);
7624 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7625 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7626
7627 int j = 0;
7628 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7629 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7630 ai != ae; ++ai, ++j) {
7631 llvm::Type *ArgTy = ai->getType();
7632 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7633 ArgTy->getPrimitiveSizeInBits())
7634 continue;
7635
7636 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7637 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7638 // it before inserting.
7639 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7640 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7641 Ops[j] =
7642 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7643 }
7644
7645 Value *Result = CGF.EmitNeonCall(F, Ops, s);
7646 llvm::Type *ResultType = CGF.ConvertType(E->getType());
7647 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7648 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7649 return CGF.Builder.CreateExtractElement(Result, C0);
7650
7651 return CGF.Builder.CreateBitCast(Result, ResultType, s);
7652}
7653
7655 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7656 const char *NameHint, unsigned Modifier, const CallExpr *E,
7657 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
7658 llvm::Triple::ArchType Arch) {
7659 // Get the last argument, which specifies the vector type.
7660 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
7661 std::optional<llvm::APSInt> NeonTypeConst =
7663 if (!NeonTypeConst)
7664 return nullptr;
7665
7666 // Determine the type of this overloaded NEON intrinsic.
7667 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
7668 bool Usgn = Type.isUnsigned();
7669 bool Quad = Type.isQuad();
7670 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
7671 const bool AllowBFloatArgsAndRet =
7672 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
7673
7674 llvm::FixedVectorType *VTy =
7675 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
7676 llvm::Type *Ty = VTy;
7677 if (!Ty)
7678 return nullptr;
7679
7680 auto getAlignmentValue32 = [&](Address addr) -> Value* {
7681 return Builder.getInt32(addr.getAlignment().getQuantity());
7682 };
7683
7684 unsigned Int = LLVMIntrinsic;
7685 if ((Modifier & UnsignedAlts) && !Usgn)
7686 Int = AltLLVMIntrinsic;
7687
7688 switch (BuiltinID) {
7689 default: break;
7690 case NEON::BI__builtin_neon_splat_lane_v:
7691 case NEON::BI__builtin_neon_splat_laneq_v:
7692 case NEON::BI__builtin_neon_splatq_lane_v:
7693 case NEON::BI__builtin_neon_splatq_laneq_v: {
7694 auto NumElements = VTy->getElementCount();
7695 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
7696 NumElements = NumElements * 2;
7697 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
7698 NumElements = NumElements.divideCoefficientBy(2);
7699
7700 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7701 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
7702 }
7703 case NEON::BI__builtin_neon_vpadd_v:
7704 case NEON::BI__builtin_neon_vpaddq_v:
7705 // We don't allow fp/int overloading of intrinsics.
7706 if (VTy->getElementType()->isFloatingPointTy() &&
7707 Int == Intrinsic::aarch64_neon_addp)
7708 Int = Intrinsic::aarch64_neon_faddp;
7709 break;
7710 case NEON::BI__builtin_neon_vabs_v:
7711 case NEON::BI__builtin_neon_vabsq_v:
7712 if (VTy->getElementType()->isFloatingPointTy())
7713 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
7714 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
7715 case NEON::BI__builtin_neon_vadd_v:
7716 case NEON::BI__builtin_neon_vaddq_v: {
7717 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
7718 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7719 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7720 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
7721 return Builder.CreateBitCast(Ops[0], Ty);
7722 }
7723 case NEON::BI__builtin_neon_vaddhn_v: {
7724 llvm::FixedVectorType *SrcTy =
7725 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7726
7727 // %sum = add <4 x i32> %lhs, %rhs
7728 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7729 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7730 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
7731
7732 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7733 Constant *ShiftAmt =
7734 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7735 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
7736
7737 // %res = trunc <4 x i32> %high to <4 x i16>
7738 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
7739 }
7740 case NEON::BI__builtin_neon_vcale_v:
7741 case NEON::BI__builtin_neon_vcaleq_v:
7742 case NEON::BI__builtin_neon_vcalt_v:
7743 case NEON::BI__builtin_neon_vcaltq_v:
7744 std::swap(Ops[0], Ops[1]);
7745 [[fallthrough]];
7746 case NEON::BI__builtin_neon_vcage_v:
7747 case NEON::BI__builtin_neon_vcageq_v:
7748 case NEON::BI__builtin_neon_vcagt_v:
7749 case NEON::BI__builtin_neon_vcagtq_v: {
7750 llvm::Type *Ty;
7751 switch (VTy->getScalarSizeInBits()) {
7752 default: llvm_unreachable("unexpected type");
7753 case 32:
7754 Ty = FloatTy;
7755 break;
7756 case 64:
7757 Ty = DoubleTy;
7758 break;
7759 case 16:
7760 Ty = HalfTy;
7761 break;
7762 }
7763 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
7764 llvm::Type *Tys[] = { VTy, VecFlt };
7765 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7766 return EmitNeonCall(F, Ops, NameHint);
7767 }
7768 case NEON::BI__builtin_neon_vceqz_v:
7769 case NEON::BI__builtin_neon_vceqzq_v:
7770 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
7771 ICmpInst::ICMP_EQ, "vceqz");
7772 case NEON::BI__builtin_neon_vcgez_v:
7773 case NEON::BI__builtin_neon_vcgezq_v:
7774 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
7775 ICmpInst::ICMP_SGE, "vcgez");
7776 case NEON::BI__builtin_neon_vclez_v:
7777 case NEON::BI__builtin_neon_vclezq_v:
7778 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
7779 ICmpInst::ICMP_SLE, "vclez");
7780 case NEON::BI__builtin_neon_vcgtz_v:
7781 case NEON::BI__builtin_neon_vcgtzq_v:
7782 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
7783 ICmpInst::ICMP_SGT, "vcgtz");
7784 case NEON::BI__builtin_neon_vcltz_v:
7785 case NEON::BI__builtin_neon_vcltzq_v:
7786 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
7787 ICmpInst::ICMP_SLT, "vcltz");
7788 case NEON::BI__builtin_neon_vclz_v:
7789 case NEON::BI__builtin_neon_vclzq_v:
7790 // We generate target-independent intrinsic, which needs a second argument
7791 // for whether or not clz of zero is undefined; on ARM it isn't.
7792 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
7793 break;
7794 case NEON::BI__builtin_neon_vcvt_f32_v:
7795 case NEON::BI__builtin_neon_vcvtq_f32_v:
7796 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7797 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7798 HasLegalHalfType);
7799 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7800 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7801 case NEON::BI__builtin_neon_vcvt_f16_s16:
7802 case NEON::BI__builtin_neon_vcvt_f16_u16:
7803 case NEON::BI__builtin_neon_vcvtq_f16_s16:
7804 case NEON::BI__builtin_neon_vcvtq_f16_u16:
7805 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7806 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7807 HasLegalHalfType);
7808 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7809 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7810 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7811 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7812 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7813 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7814 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7815 Function *F = CGM.getIntrinsic(Int, Tys);
7816 return EmitNeonCall(F, Ops, "vcvt_n");
7817 }
7818 case NEON::BI__builtin_neon_vcvt_n_f32_v:
7819 case NEON::BI__builtin_neon_vcvt_n_f64_v:
7820 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7821 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7822 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7823 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7824 Function *F = CGM.getIntrinsic(Int, Tys);
7825 return EmitNeonCall(F, Ops, "vcvt_n");
7826 }
7827 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7828 case NEON::BI__builtin_neon_vcvt_n_s32_v:
7829 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7830 case NEON::BI__builtin_neon_vcvt_n_u32_v:
7831 case NEON::BI__builtin_neon_vcvt_n_s64_v:
7832 case NEON::BI__builtin_neon_vcvt_n_u64_v:
7833 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7834 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7835 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7836 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7837 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7838 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7839 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7840 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7841 return EmitNeonCall(F, Ops, "vcvt_n");
7842 }
7843 case NEON::BI__builtin_neon_vcvt_s32_v:
7844 case NEON::BI__builtin_neon_vcvt_u32_v:
7845 case NEON::BI__builtin_neon_vcvt_s64_v:
7846 case NEON::BI__builtin_neon_vcvt_u64_v:
7847 case NEON::BI__builtin_neon_vcvt_s16_f16:
7848 case NEON::BI__builtin_neon_vcvt_u16_f16:
7849 case NEON::BI__builtin_neon_vcvtq_s32_v:
7850 case NEON::BI__builtin_neon_vcvtq_u32_v:
7851 case NEON::BI__builtin_neon_vcvtq_s64_v:
7852 case NEON::BI__builtin_neon_vcvtq_u64_v:
7853 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7854 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7855 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7856 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
7857 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
7858 }
7859 case NEON::BI__builtin_neon_vcvta_s16_f16:
7860 case NEON::BI__builtin_neon_vcvta_s32_v:
7861 case NEON::BI__builtin_neon_vcvta_s64_v:
7862 case NEON::BI__builtin_neon_vcvta_u16_f16:
7863 case NEON::BI__builtin_neon_vcvta_u32_v:
7864 case NEON::BI__builtin_neon_vcvta_u64_v:
7865 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7866 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7867 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7868 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7869 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7870 case NEON::BI__builtin_neon_vcvtaq_u64_v:
7871 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7872 case NEON::BI__builtin_neon_vcvtn_s32_v:
7873 case NEON::BI__builtin_neon_vcvtn_s64_v:
7874 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7875 case NEON::BI__builtin_neon_vcvtn_u32_v:
7876 case NEON::BI__builtin_neon_vcvtn_u64_v:
7877 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7878 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7879 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7880 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7881 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7882 case NEON::BI__builtin_neon_vcvtnq_u64_v:
7883 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7884 case NEON::BI__builtin_neon_vcvtp_s32_v:
7885 case NEON::BI__builtin_neon_vcvtp_s64_v:
7886 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7887 case NEON::BI__builtin_neon_vcvtp_u32_v:
7888 case NEON::BI__builtin_neon_vcvtp_u64_v:
7889 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7890 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7891 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7892 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7893 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7894 case NEON::BI__builtin_neon_vcvtpq_u64_v:
7895 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7896 case NEON::BI__builtin_neon_vcvtm_s32_v:
7897 case NEON::BI__builtin_neon_vcvtm_s64_v:
7898 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7899 case NEON::BI__builtin_neon_vcvtm_u32_v:
7900 case NEON::BI__builtin_neon_vcvtm_u64_v:
7901 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7902 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7903 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7904 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7905 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7906 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7907 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7908 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7909 }
7910 case NEON::BI__builtin_neon_vcvtx_f32_v: {
7911 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7912 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7913
7914 }
7915 case NEON::BI__builtin_neon_vext_v:
7916 case NEON::BI__builtin_neon_vextq_v: {
7917 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
7918 SmallVector<int, 16> Indices;
7919 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7920 Indices.push_back(i+CV);
7921
7922 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7923 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7924 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
7925 }
7926 case NEON::BI__builtin_neon_vfma_v:
7927 case NEON::BI__builtin_neon_vfmaq_v: {
7928 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7929 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7930 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7931
7932 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7934 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7935 {Ops[1], Ops[2], Ops[0]});
7936 }
7937 case NEON::BI__builtin_neon_vld1_v:
7938 case NEON::BI__builtin_neon_vld1q_v: {
7939 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7940 Ops.push_back(getAlignmentValue32(PtrOp0));
7941 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
7942 }
7943 case NEON::BI__builtin_neon_vld1_x2_v:
7944 case NEON::BI__builtin_neon_vld1q_x2_v:
7945 case NEON::BI__builtin_neon_vld1_x3_v:
7946 case NEON::BI__builtin_neon_vld1q_x3_v:
7947 case NEON::BI__builtin_neon_vld1_x4_v:
7948 case NEON::BI__builtin_neon_vld1q_x4_v: {
7949 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7950 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7951 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7952 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7953 }
7954 case NEON::BI__builtin_neon_vld2_v:
7955 case NEON::BI__builtin_neon_vld2q_v:
7956 case NEON::BI__builtin_neon_vld3_v:
7957 case NEON::BI__builtin_neon_vld3q_v:
7958 case NEON::BI__builtin_neon_vld4_v:
7959 case NEON::BI__builtin_neon_vld4q_v:
7960 case NEON::BI__builtin_neon_vld2_dup_v:
7961 case NEON::BI__builtin_neon_vld2q_dup_v:
7962 case NEON::BI__builtin_neon_vld3_dup_v:
7963 case NEON::BI__builtin_neon_vld3q_dup_v:
7964 case NEON::BI__builtin_neon_vld4_dup_v:
7965 case NEON::BI__builtin_neon_vld4q_dup_v: {
7966 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7967 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7968 Value *Align = getAlignmentValue32(PtrOp1);
7969 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
7970 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7971 }
7972 case NEON::BI__builtin_neon_vld1_dup_v:
7973 case NEON::BI__builtin_neon_vld1q_dup_v: {
7974 Value *V = PoisonValue::get(Ty);
7975 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
7976 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
7977 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
7978 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
7979 return EmitNeonSplat(Ops[0], CI);
7980 }
7981 case NEON::BI__builtin_neon_vld2_lane_v:
7982 case NEON::BI__builtin_neon_vld2q_lane_v:
7983 case NEON::BI__builtin_neon_vld3_lane_v:
7984 case NEON::BI__builtin_neon_vld3q_lane_v:
7985 case NEON::BI__builtin_neon_vld4_lane_v:
7986 case NEON::BI__builtin_neon_vld4q_lane_v: {
7987 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7988 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7989 for (unsigned I = 2; I < Ops.size() - 1; ++I)
7990 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
7991 Ops.push_back(getAlignmentValue32(PtrOp1));
7992 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
7993 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7994 }
7995 case NEON::BI__builtin_neon_vmovl_v: {
7996 llvm::FixedVectorType *DTy =
7997 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7998 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
7999 if (Usgn)
8000 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8001 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8002 }
8003 case NEON::BI__builtin_neon_vmovn_v: {
8004 llvm::FixedVectorType *QTy =
8005 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8006 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8007 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8008 }
8009 case NEON::BI__builtin_neon_vmull_v:
8010 // FIXME: the integer vmull operations could be emitted in terms of pure
8011 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8012 // hoisting the exts outside loops. Until global ISel comes along that can
8013 // see through such movement this leads to bad CodeGen. So we need an
8014 // intrinsic for now.
8015 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8016 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8017 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8018 case NEON::BI__builtin_neon_vpadal_v:
8019 case NEON::BI__builtin_neon_vpadalq_v: {
8020 // The source operand type has twice as many elements of half the size.
8021 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8022 llvm::Type *EltTy =
8023 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8024 auto *NarrowTy =
8025 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8026 llvm::Type *Tys[2] = { Ty, NarrowTy };
8027 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8028 }
8029 case NEON::BI__builtin_neon_vpaddl_v:
8030 case NEON::BI__builtin_neon_vpaddlq_v: {
8031 // The source operand type has twice as many elements of half the size.
8032 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8033 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8034 auto *NarrowTy =
8035 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8036 llvm::Type *Tys[2] = { Ty, NarrowTy };
8037 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8038 }
8039 case NEON::BI__builtin_neon_vqdmlal_v:
8040 case NEON::BI__builtin_neon_vqdmlsl_v: {
8041 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8042 Ops[1] =
8043 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8044 Ops.resize(2);
8045 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8046 }
8047 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8048 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8049 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8050 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8051 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8052 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8053 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8054 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8055 RTy->getNumElements() * 2);
8056 llvm::Type *Tys[2] = {
8057 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8058 /*isQuad*/ false))};
8059 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8060 }
8061 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8062 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8063 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8064 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8065 llvm::Type *Tys[2] = {
8066 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8067 /*isQuad*/ true))};
8068 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8069 }
8070 case NEON::BI__builtin_neon_vqshl_n_v:
8071 case NEON::BI__builtin_neon_vqshlq_n_v:
8072 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8073 1, false);
8074 case NEON::BI__builtin_neon_vqshlu_n_v:
8075 case NEON::BI__builtin_neon_vqshluq_n_v:
8076 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8077 1, false);
8078 case NEON::BI__builtin_neon_vrecpe_v:
8079 case NEON::BI__builtin_neon_vrecpeq_v:
8080 case NEON::BI__builtin_neon_vrsqrte_v:
8081 case NEON::BI__builtin_neon_vrsqrteq_v:
8082 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8083 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8084 case NEON::BI__builtin_neon_vrndi_v:
8085 case NEON::BI__builtin_neon_vrndiq_v:
8086 Int = Builder.getIsFPConstrained()
8087 ? Intrinsic::experimental_constrained_nearbyint
8088 : Intrinsic::nearbyint;
8089 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8090 case NEON::BI__builtin_neon_vrshr_n_v:
8091 case NEON::BI__builtin_neon_vrshrq_n_v:
8092 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8093 1, true);
8094 case NEON::BI__builtin_neon_vsha512hq_u64:
8095 case NEON::BI__builtin_neon_vsha512h2q_u64:
8096 case NEON::BI__builtin_neon_vsha512su0q_u64:
8097 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8098 Function *F = CGM.getIntrinsic(Int);
8099 return EmitNeonCall(F, Ops, "");
8100 }
8101 case NEON::BI__builtin_neon_vshl_n_v:
8102 case NEON::BI__builtin_neon_vshlq_n_v:
8103 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8104 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8105 "vshl_n");
8106 case NEON::BI__builtin_neon_vshll_n_v: {
8107 llvm::FixedVectorType *SrcTy =
8108 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8109 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8110 if (Usgn)
8111 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8112 else
8113 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8114 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8115 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8116 }
8117 case NEON::BI__builtin_neon_vshrn_n_v: {
8118 llvm::FixedVectorType *SrcTy =
8119 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8120 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8121 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8122 if (Usgn)
8123 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8124 else
8125 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8126 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8127 }
8128 case NEON::BI__builtin_neon_vshr_n_v:
8129 case NEON::BI__builtin_neon_vshrq_n_v:
8130 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8131 case NEON::BI__builtin_neon_vst1_v:
8132 case NEON::BI__builtin_neon_vst1q_v:
8133 case NEON::BI__builtin_neon_vst2_v:
8134 case NEON::BI__builtin_neon_vst2q_v:
8135 case NEON::BI__builtin_neon_vst3_v:
8136 case NEON::BI__builtin_neon_vst3q_v:
8137 case NEON::BI__builtin_neon_vst4_v:
8138 case NEON::BI__builtin_neon_vst4q_v:
8139 case NEON::BI__builtin_neon_vst2_lane_v:
8140 case NEON::BI__builtin_neon_vst2q_lane_v:
8141 case NEON::BI__builtin_neon_vst3_lane_v:
8142 case NEON::BI__builtin_neon_vst3q_lane_v:
8143 case NEON::BI__builtin_neon_vst4_lane_v:
8144 case NEON::BI__builtin_neon_vst4q_lane_v: {
8145 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8146 Ops.push_back(getAlignmentValue32(PtrOp0));
8147 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8148 }
8149 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8150 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8151 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8152 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8153 case NEON::BI__builtin_neon_vsm4eq_u32: {
8154 Function *F = CGM.getIntrinsic(Int);
8155 return EmitNeonCall(F, Ops, "");
8156 }
8157 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8158 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8159 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8160 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8161 Function *F = CGM.getIntrinsic(Int);
8162 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8163 return EmitNeonCall(F, Ops, "");
8164 }
8165 case NEON::BI__builtin_neon_vst1_x2_v:
8166 case NEON::BI__builtin_neon_vst1q_x2_v:
8167 case NEON::BI__builtin_neon_vst1_x3_v:
8168 case NEON::BI__builtin_neon_vst1q_x3_v:
8169 case NEON::BI__builtin_neon_vst1_x4_v:
8170 case NEON::BI__builtin_neon_vst1q_x4_v: {
8171 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8172 // in AArch64 it comes last. We may want to stick to one or another.
8173 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8174 Arch == llvm::Triple::aarch64_32) {
8175 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8176 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8177 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8178 }
8179 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8180 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8181 }
8182 case NEON::BI__builtin_neon_vsubhn_v: {
8183 llvm::FixedVectorType *SrcTy =
8184 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8185
8186 // %sum = add <4 x i32> %lhs, %rhs
8187 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8188 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8189 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8190
8191 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8192 Constant *ShiftAmt =
8193 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8194 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8195
8196 // %res = trunc <4 x i32> %high to <4 x i16>
8197 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8198 }
8199 case NEON::BI__builtin_neon_vtrn_v:
8200 case NEON::BI__builtin_neon_vtrnq_v: {
8201 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8202 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8203 Value *SV = nullptr;
8204
8205 for (unsigned vi = 0; vi != 2; ++vi) {
8206 SmallVector<int, 16> Indices;
8207 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8208 Indices.push_back(i+vi);
8209 Indices.push_back(i+e+vi);
8210 }
8211 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8212 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8213 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8214 }
8215 return SV;
8216 }
8217 case NEON::BI__builtin_neon_vtst_v:
8218 case NEON::BI__builtin_neon_vtstq_v: {
8219 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8220 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8221 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8222 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8223 ConstantAggregateZero::get(Ty));
8224 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8225 }
8226 case NEON::BI__builtin_neon_vuzp_v:
8227 case NEON::BI__builtin_neon_vuzpq_v: {
8228 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8229 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8230 Value *SV = nullptr;
8231
8232 for (unsigned vi = 0; vi != 2; ++vi) {
8233 SmallVector<int, 16> Indices;
8234 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8235 Indices.push_back(2*i+vi);
8236
8237 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8238 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8239 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8240 }
8241 return SV;
8242 }
8243 case NEON::BI__builtin_neon_vxarq_u64: {
8244 Function *F = CGM.getIntrinsic(Int);
8245 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8246 return EmitNeonCall(F, Ops, "");
8247 }
8248 case NEON::BI__builtin_neon_vzip_v:
8249 case NEON::BI__builtin_neon_vzipq_v: {
8250 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8251 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8252 Value *SV = nullptr;
8253
8254 for (unsigned vi = 0; vi != 2; ++vi) {
8255 SmallVector<int, 16> Indices;
8256 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8257 Indices.push_back((i + vi*e) >> 1);
8258 Indices.push_back(((i + vi*e) >> 1)+e);
8259 }
8260 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8261 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8262 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8263 }
8264 return SV;
8265 }
8266 case NEON::BI__builtin_neon_vdot_s32:
8267 case NEON::BI__builtin_neon_vdot_u32:
8268 case NEON::BI__builtin_neon_vdotq_s32:
8269 case NEON::BI__builtin_neon_vdotq_u32: {
8270 auto *InputTy =
8271 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8272 llvm::Type *Tys[2] = { Ty, InputTy };
8273 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8274 }
8275 case NEON::BI__builtin_neon_vfmlal_low_f16:
8276 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8277 auto *InputTy =
8278 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8279 llvm::Type *Tys[2] = { Ty, InputTy };
8280 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8281 }
8282 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8283 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8284 auto *InputTy =
8285 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8286 llvm::Type *Tys[2] = { Ty, InputTy };
8287 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8288 }
8289 case NEON::BI__builtin_neon_vfmlal_high_f16:
8290 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8291 auto *InputTy =
8292 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8293 llvm::Type *Tys[2] = { Ty, InputTy };
8294 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8295 }
8296 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8297 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8298 auto *InputTy =
8299 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8300 llvm::Type *Tys[2] = { Ty, InputTy };
8301 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8302 }
8303 case NEON::BI__builtin_neon_vmmlaq_s32:
8304 case NEON::BI__builtin_neon_vmmlaq_u32: {
8305 auto *InputTy =
8306 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8307 llvm::Type *Tys[2] = { Ty, InputTy };
8308 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8309 }
8310 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8311 auto *InputTy =
8312 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8313 llvm::Type *Tys[2] = { Ty, InputTy };
8314 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8315 }
8316 case NEON::BI__builtin_neon_vusdot_s32:
8317 case NEON::BI__builtin_neon_vusdotq_s32: {
8318 auto *InputTy =
8319 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8320 llvm::Type *Tys[2] = { Ty, InputTy };
8321 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8322 }
8323 case NEON::BI__builtin_neon_vbfdot_f32:
8324 case NEON::BI__builtin_neon_vbfdotq_f32: {
8325 llvm::Type *InputTy =
8326 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8327 llvm::Type *Tys[2] = { Ty, InputTy };
8328 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8329 }
8330 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8331 llvm::Type *Tys[1] = { Ty };
8332 Function *F = CGM.getIntrinsic(Int, Tys);
8333 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8334 }
8335
8336 }
8337
8338 assert(Int && "Expected valid intrinsic number");
8339
8340 // Determine the type(s) of this overloaded AArch64 intrinsic.
8341 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8342
8343 Value *Result = EmitNeonCall(F, Ops, NameHint);
8344 llvm::Type *ResultType = ConvertType(E->getType());
8345 // AArch64 intrinsic one-element vector type cast to
8346 // scalar type expected by the builtin
8347 return Builder.CreateBitCast(Result, ResultType, NameHint);
8348}
8349
8351 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8352 const CmpInst::Predicate Ip, const Twine &Name) {
8353 llvm::Type *OTy = Op->getType();
8354
8355 // FIXME: this is utterly horrific. We should not be looking at previous
8356 // codegen context to find out what needs doing. Unfortunately TableGen
8357 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8358 // (etc).
8359 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8360 OTy = BI->getOperand(0)->getType();
8361
8362 Op = Builder.CreateBitCast(Op, OTy);
8363 if (OTy->getScalarType()->isFloatingPointTy()) {
8364 if (Fp == CmpInst::FCMP_OEQ)
8365 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8366 else
8367 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8368 } else {
8369 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8370 }
8371 return Builder.CreateSExt(Op, Ty, Name);
8372}
8373
8375 Value *ExtOp, Value *IndexOp,
8376 llvm::Type *ResTy, unsigned IntID,
8377 const char *Name) {
8379 if (ExtOp)
8380 TblOps.push_back(ExtOp);
8381
8382 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8383 SmallVector<int, 16> Indices;
8384 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8385 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8386 Indices.push_back(2*i);
8387 Indices.push_back(2*i+1);
8388 }
8389
8390 int PairPos = 0, End = Ops.size() - 1;
8391 while (PairPos < End) {
8392 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8393 Ops[PairPos+1], Indices,
8394 Name));
8395 PairPos += 2;
8396 }
8397
8398 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8399 // of the 128-bit lookup table with zero.
8400 if (PairPos == End) {
8401 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8402 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8403 ZeroTbl, Indices, Name));
8404 }
8405
8406 Function *TblF;
8407 TblOps.push_back(IndexOp);
8408 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8409
8410 return CGF.EmitNeonCall(TblF, TblOps, Name);
8411}
8412
8413Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8414 unsigned Value;
8415 switch (BuiltinID) {
8416 default:
8417 return nullptr;
8418 case clang::ARM::BI__builtin_arm_nop:
8419 Value = 0;
8420 break;
8421 case clang::ARM::BI__builtin_arm_yield:
8422 case clang::ARM::BI__yield:
8423 Value = 1;
8424 break;
8425 case clang::ARM::BI__builtin_arm_wfe:
8426 case clang::ARM::BI__wfe:
8427 Value = 2;
8428 break;
8429 case clang::ARM::BI__builtin_arm_wfi:
8430 case clang::ARM::BI__wfi:
8431 Value = 3;
8432 break;
8433 case clang::ARM::BI__builtin_arm_sev:
8434 case clang::ARM::BI__sev:
8435 Value = 4;
8436 break;
8437 case clang::ARM::BI__builtin_arm_sevl:
8438 case clang::ARM::BI__sevl:
8439 Value = 5;
8440 break;
8441 }
8442
8443 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8444 llvm::ConstantInt::get(Int32Ty, Value));
8445}
8446
8451};
8452
8453// Generates the IR for __builtin_read_exec_*.
8454// Lowers the builtin to amdgcn_ballot intrinsic.
8456 llvm::Type *RegisterType,
8457 llvm::Type *ValueType, bool isExecHi) {
8458 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8459 CodeGen::CodeGenModule &CGM = CGF.CGM;
8460
8461 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8462 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8463
8464 if (isExecHi) {
8465 Value *Rt2 = Builder.CreateLShr(Call, 32);
8466 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8467 return Rt2;
8468 }
8469
8470 return Call;
8471}
8472
8473// Generates the IR for the read/write special register builtin,
8474// ValueType is the type of the value that is to be written or read,
8475// RegisterType is the type of the register being written to or read from.
8477 const CallExpr *E,
8478 llvm::Type *RegisterType,
8479 llvm::Type *ValueType,
8480 SpecialRegisterAccessKind AccessKind,
8481 StringRef SysReg = "") {
8482 // write and register intrinsics only support 32, 64 and 128 bit operations.
8483 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8484 RegisterType->isIntegerTy(128)) &&
8485 "Unsupported size for register.");
8486
8487 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8488 CodeGen::CodeGenModule &CGM = CGF.CGM;
8489 LLVMContext &Context = CGM.getLLVMContext();
8490
8491 if (SysReg.empty()) {
8492 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8493 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8494 }
8495
8496 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8497 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8498 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8499
8500 llvm::Type *Types[] = { RegisterType };
8501
8502 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8503 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8504 && "Can't fit 64-bit value in 32-bit register");
8505
8506 if (AccessKind != Write) {
8507 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8508 llvm::Function *F = CGM.getIntrinsic(
8509 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8510 : llvm::Intrinsic::read_register,
8511 Types);
8512 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8513
8514 if (MixedTypes)
8515 // Read into 64 bit register and then truncate result to 32 bit.
8516 return Builder.CreateTrunc(Call, ValueType);
8517
8518 if (ValueType->isPointerTy())
8519 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8520 return Builder.CreateIntToPtr(Call, ValueType);
8521
8522 return Call;
8523 }
8524
8525 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8526 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8527 if (MixedTypes) {
8528 // Extend 32 bit write value to 64 bit to pass to write.
8529 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8530 return Builder.CreateCall(F, { Metadata, ArgValue });
8531 }
8532
8533 if (ValueType->isPointerTy()) {
8534 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8535 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8536 return Builder.CreateCall(F, { Metadata, ArgValue });
8537 }
8538
8539 return Builder.CreateCall(F, { Metadata, ArgValue });
8540}
8541
8542/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8543/// argument that specifies the vector type.
8544static bool HasExtraNeonArgument(unsigned BuiltinID) {
8545 switch (BuiltinID) {
8546 default: break;
8547 case NEON::BI__builtin_neon_vget_lane_i8:
8548 case NEON::BI__builtin_neon_vget_lane_i16:
8549 case NEON::BI__builtin_neon_vget_lane_bf16:
8550 case NEON::BI__builtin_neon_vget_lane_i32:
8551 case NEON::BI__builtin_neon_vget_lane_i64:
8552 case NEON::BI__builtin_neon_vget_lane_f32:
8553 case NEON::BI__builtin_neon_vgetq_lane_i8:
8554 case NEON::BI__builtin_neon_vgetq_lane_i16:
8555 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8556 case NEON::BI__builtin_neon_vgetq_lane_i32:
8557 case NEON::BI__builtin_neon_vgetq_lane_i64:
8558 case NEON::BI__builtin_neon_vgetq_lane_f32:
8559 case NEON::BI__builtin_neon_vduph_lane_bf16:
8560 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8561 case NEON::BI__builtin_neon_vset_lane_i8:
8562 case NEON::BI__builtin_neon_vset_lane_i16:
8563 case NEON::BI__builtin_neon_vset_lane_bf16:
8564 case NEON::BI__builtin_neon_vset_lane_i32:
8565 case NEON::BI__builtin_neon_vset_lane_i64:
8566 case NEON::BI__builtin_neon_vset_lane_f32:
8567 case NEON::BI__builtin_neon_vsetq_lane_i8:
8568 case NEON::BI__builtin_neon_vsetq_lane_i16:
8569 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8570 case NEON::BI__builtin_neon_vsetq_lane_i32:
8571 case NEON::BI__builtin_neon_vsetq_lane_i64:
8572 case NEON::BI__builtin_neon_vsetq_lane_f32:
8573 case NEON::BI__builtin_neon_vsha1h_u32:
8574 case NEON::BI__builtin_neon_vsha1cq_u32:
8575 case NEON::BI__builtin_neon_vsha1pq_u32:
8576 case NEON::BI__builtin_neon_vsha1mq_u32:
8577 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8578 case clang::ARM::BI_MoveToCoprocessor:
8579 case clang::ARM::BI_MoveToCoprocessor2:
8580 return false;
8581 }
8582 return true;
8583}
8584
8585Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8586 const CallExpr *E,
8587 ReturnValueSlot ReturnValue,
8588 llvm::Triple::ArchType Arch) {
8589 if (auto Hint = GetValueForARMHint(BuiltinID))
8590 return Hint;
8591
8592 if (BuiltinID == clang::ARM::BI__emit) {
8593 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8594 llvm::FunctionType *FTy =
8595 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8596
8598 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8599 llvm_unreachable("Sema will ensure that the parameter is constant");
8600
8601 llvm::APSInt Value = Result.Val.getInt();
8602 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8603
8604 llvm::InlineAsm *Emit =
8605 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8606 /*hasSideEffects=*/true)
8607 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8608 /*hasSideEffects=*/true);
8609
8610 return Builder.CreateCall(Emit);
8611 }
8612
8613 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8614 Value *Option = EmitScalarExpr(E->getArg(0));
8615 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8616 }
8617
8618 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8619 Value *Address = EmitScalarExpr(E->getArg(0));
8620 Value *RW = EmitScalarExpr(E->getArg(1));
8621 Value *IsData = EmitScalarExpr(E->getArg(2));
8622
8623 // Locality is not supported on ARM target
8624 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8625
8626 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8627 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8628 }
8629
8630 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8631 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8632 return Builder.CreateCall(
8633 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8634 }
8635
8636 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8637 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8638 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8639 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8640 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8641 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8642 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8643 return Res;
8644 }
8645
8646
8647 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8648 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8649 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8650 }
8651 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8652 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8653 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8654 "cls");
8655 }
8656
8657 if (BuiltinID == clang::ARM::BI__clear_cache) {
8658 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
8659 const FunctionDecl *FD = E->getDirectCallee();
8660 Value *Ops[2];
8661 for (unsigned i = 0; i < 2; i++)
8662 Ops[i] = EmitScalarExpr(E->getArg(i));
8663 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
8664 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
8665 StringRef Name = FD->getName();
8666 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
8667 }
8668
8669 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
8670 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
8671 Function *F;
8672
8673 switch (BuiltinID) {
8674 default: llvm_unreachable("unexpected builtin");
8675 case clang::ARM::BI__builtin_arm_mcrr:
8676 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
8677 break;
8678 case clang::ARM::BI__builtin_arm_mcrr2:
8679 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
8680 break;
8681 }
8682
8683 // MCRR{2} instruction has 5 operands but
8684 // the intrinsic has 4 because Rt and Rt2
8685 // are represented as a single unsigned 64
8686 // bit integer in the intrinsic definition
8687 // but internally it's represented as 2 32
8688 // bit integers.
8689
8690 Value *Coproc = EmitScalarExpr(E->getArg(0));
8691 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8692 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
8693 Value *CRm = EmitScalarExpr(E->getArg(3));
8694
8695 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8696 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
8697 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
8698 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
8699
8700 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
8701 }
8702
8703 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
8704 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
8705 Function *F;
8706
8707 switch (BuiltinID) {
8708 default: llvm_unreachable("unexpected builtin");
8709 case clang::ARM::BI__builtin_arm_mrrc:
8710 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
8711 break;
8712 case clang::ARM::BI__builtin_arm_mrrc2:
8713 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
8714 break;
8715 }
8716
8717 Value *Coproc = EmitScalarExpr(E->getArg(0));
8718 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8719 Value *CRm = EmitScalarExpr(E->getArg(2));
8720 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
8721
8722 // Returns an unsigned 64 bit integer, represented
8723 // as two 32 bit integers.
8724
8725 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
8726 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
8727 Rt = Builder.CreateZExt(Rt, Int64Ty);
8728 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
8729
8730 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
8731 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
8732 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
8733
8734 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
8735 }
8736
8737 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8738 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8739 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
8740 getContext().getTypeSize(E->getType()) == 64) ||
8741 BuiltinID == clang::ARM::BI__ldrexd) {
8742 Function *F;
8743
8744 switch (BuiltinID) {
8745 default: llvm_unreachable("unexpected builtin");
8746 case clang::ARM::BI__builtin_arm_ldaex:
8747 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8748 break;
8749 case clang::ARM::BI__builtin_arm_ldrexd:
8750 case clang::ARM::BI__builtin_arm_ldrex:
8751 case clang::ARM::BI__ldrexd:
8752 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8753 break;
8754 }
8755
8756 Value *LdPtr = EmitScalarExpr(E->getArg(0));
8757 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
8758
8759 Value *Val0 = Builder.CreateExtractValue(Val, 1);
8760 Value *Val1 = Builder.CreateExtractValue(Val, 0);
8761 Val0 = Builder.CreateZExt(Val0, Int64Ty);
8762 Val1 = Builder.CreateZExt(Val1, Int64Ty);
8763
8764 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
8765 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
8766 Val = Builder.CreateOr(Val, Val1);
8767 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
8768 }
8769
8770 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8771 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
8772 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
8773
8774 QualType Ty = E->getType();
8775 llvm::Type *RealResTy = ConvertType(Ty);
8776 llvm::Type *IntTy =
8777 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8778
8780 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
8781 : Intrinsic::arm_ldrex,
8782 UnqualPtrTy);
8783 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
8784 Val->addParamAttr(
8785 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8786
8787 if (RealResTy->isPointerTy())
8788 return Builder.CreateIntToPtr(Val, RealResTy);
8789 else {
8790 llvm::Type *IntResTy = llvm::IntegerType::get(
8791 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
8792 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
8793 RealResTy);
8794 }
8795 }
8796
8797 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8798 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8799 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
8800 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
8802 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
8803 : Intrinsic::arm_strexd);
8804 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
8805
8806 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8807 Value *Val = EmitScalarExpr(E->getArg(0));
8808 Builder.CreateStore(Val, Tmp);
8809
8810 Address LdPtr = Tmp.withElementType(STy);
8811 Val = Builder.CreateLoad(LdPtr);
8812
8813 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
8814 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
8815 Value *StPtr = EmitScalarExpr(E->getArg(1));
8816 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
8817 }
8818
8819 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8820 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
8821 Value *StoreVal = EmitScalarExpr(E->getArg(0));
8822 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
8823
8824 QualType Ty = E->getArg(0)->getType();
8825 llvm::Type *StoreTy =
8826 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8827
8828 if (StoreVal->getType()->isPointerTy())
8829 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
8830 else {
8831 llvm::Type *IntTy = llvm::IntegerType::get(
8833 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
8834 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
8835 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
8836 }
8837
8839 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
8840 : Intrinsic::arm_strex,
8841 StoreAddr->getType());
8842
8843 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
8844 CI->addParamAttr(
8845 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8846 return CI;
8847 }
8848
8849 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8850 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8851 return Builder.CreateCall(F);
8852 }
8853
8854 // CRC32
8855 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8856 switch (BuiltinID) {
8857 case clang::ARM::BI__builtin_arm_crc32b:
8858 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8859 case clang::ARM::BI__builtin_arm_crc32cb:
8860 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8861 case clang::ARM::BI__builtin_arm_crc32h:
8862 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8863 case clang::ARM::BI__builtin_arm_crc32ch:
8864 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8865 case clang::ARM::BI__builtin_arm_crc32w:
8866 case clang::ARM::BI__builtin_arm_crc32d:
8867 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8868 case clang::ARM::BI__builtin_arm_crc32cw:
8869 case clang::ARM::BI__builtin_arm_crc32cd:
8870 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8871 }
8872
8873 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8874 Value *Arg0 = EmitScalarExpr(E->getArg(0));
8875 Value *Arg1 = EmitScalarExpr(E->getArg(1));
8876
8877 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8878 // intrinsics, hence we need different codegen for these cases.
8879 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8880 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
8881 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8882 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
8883 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
8884 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
8885
8886 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8887 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
8888 return Builder.CreateCall(F, {Res, Arg1b});
8889 } else {
8890 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
8891
8892 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8893 return Builder.CreateCall(F, {Arg0, Arg1});
8894 }
8895 }
8896
8897 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8898 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8899 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8900 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
8901 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
8902 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
8903
8904 SpecialRegisterAccessKind AccessKind = Write;
8905 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8906 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8907 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
8908 AccessKind = VolatileRead;
8909
8910 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8911 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
8912
8913 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8914 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
8915
8916 llvm::Type *ValueType;
8917 llvm::Type *RegisterType;
8918 if (IsPointerBuiltin) {
8919 ValueType = VoidPtrTy;
8920 RegisterType = Int32Ty;
8921 } else if (Is64Bit) {
8922 ValueType = RegisterType = Int64Ty;
8923 } else {
8924 ValueType = RegisterType = Int32Ty;
8925 }
8926
8927 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
8928 AccessKind);
8929 }
8930
8931 if (BuiltinID == ARM::BI__builtin_sponentry) {
8932 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8933 return Builder.CreateCall(F);
8934 }
8935
8936 // Handle MSVC intrinsics before argument evaluation to prevent double
8937 // evaluation.
8938 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8939 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
8940
8941 // Deal with MVE builtins
8942 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8943 return Result;
8944 // Handle CDE builtins
8945 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8946 return Result;
8947
8948 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8949 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
8950 return P.first == BuiltinID;
8951 });
8952 if (It != end(NEONEquivalentIntrinsicMap))
8953 BuiltinID = It->second;
8954
8955 // Find out if any arguments are required to be integer constant
8956 // expressions.
8957 unsigned ICEArguments = 0;
8959 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8960 assert(Error == ASTContext::GE_None && "Should not codegen an error");
8961
8962 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8963 return Builder.getInt32(addr.getAlignment().getQuantity());
8964 };
8965
8966 Address PtrOp0 = Address::invalid();
8967 Address PtrOp1 = Address::invalid();
8969 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
8970 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
8971 for (unsigned i = 0, e = NumArgs; i != e; i++) {
8972 if (i == 0) {
8973 switch (BuiltinID) {
8974 case NEON::BI__builtin_neon_vld1_v:
8975 case NEON::BI__builtin_neon_vld1q_v:
8976 case NEON::BI__builtin_neon_vld1q_lane_v:
8977 case NEON::BI__builtin_neon_vld1_lane_v:
8978 case NEON::BI__builtin_neon_vld1_dup_v:
8979 case NEON::BI__builtin_neon_vld1q_dup_v:
8980 case NEON::BI__builtin_neon_vst1_v:
8981 case NEON::BI__builtin_neon_vst1q_v:
8982 case NEON::BI__builtin_neon_vst1q_lane_v:
8983 case NEON::BI__builtin_neon_vst1_lane_v:
8984 case NEON::BI__builtin_neon_vst2_v:
8985 case NEON::BI__builtin_neon_vst2q_v:
8986 case NEON::BI__builtin_neon_vst2_lane_v:
8987 case NEON::BI__builtin_neon_vst2q_lane_v:
8988 case NEON::BI__builtin_neon_vst3_v:
8989 case NEON::BI__builtin_neon_vst3q_v:
8990 case NEON::BI__builtin_neon_vst3_lane_v:
8991 case NEON::BI__builtin_neon_vst3q_lane_v:
8992 case NEON::BI__builtin_neon_vst4_v:
8993 case NEON::BI__builtin_neon_vst4q_v:
8994 case NEON::BI__builtin_neon_vst4_lane_v:
8995 case NEON::BI__builtin_neon_vst4q_lane_v:
8996 // Get the alignment for the argument in addition to the value;
8997 // we'll use it later.
8998 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
8999 Ops.push_back(PtrOp0.emitRawPointer(*this));
9000 continue;
9001 }
9002 }
9003 if (i == 1) {
9004 switch (BuiltinID) {
9005 case NEON::BI__builtin_neon_vld2_v:
9006 case NEON::BI__builtin_neon_vld2q_v:
9007 case NEON::BI__builtin_neon_vld3_v:
9008 case NEON::BI__builtin_neon_vld3q_v:
9009 case NEON::BI__builtin_neon_vld4_v:
9010 case NEON::BI__builtin_neon_vld4q_v:
9011 case NEON::BI__builtin_neon_vld2_lane_v:
9012 case NEON::BI__builtin_neon_vld2q_lane_v:
9013 case NEON::BI__builtin_neon_vld3_lane_v:
9014 case NEON::BI__builtin_neon_vld3q_lane_v:
9015 case NEON::BI__builtin_neon_vld4_lane_v:
9016 case NEON::BI__builtin_neon_vld4q_lane_v:
9017 case NEON::BI__builtin_neon_vld2_dup_v:
9018 case NEON::BI__builtin_neon_vld2q_dup_v:
9019 case NEON::BI__builtin_neon_vld3_dup_v:
9020 case NEON::BI__builtin_neon_vld3q_dup_v:
9021 case NEON::BI__builtin_neon_vld4_dup_v:
9022 case NEON::BI__builtin_neon_vld4q_dup_v:
9023 // Get the alignment for the argument in addition to the value;
9024 // we'll use it later.
9025 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9026 Ops.push_back(PtrOp1.emitRawPointer(*this));
9027 continue;
9028 }
9029 }
9030
9031 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9032 }
9033
9034 switch (BuiltinID) {
9035 default: break;
9036
9037 case NEON::BI__builtin_neon_vget_lane_i8:
9038 case NEON::BI__builtin_neon_vget_lane_i16:
9039 case NEON::BI__builtin_neon_vget_lane_i32:
9040 case NEON::BI__builtin_neon_vget_lane_i64:
9041 case NEON::BI__builtin_neon_vget_lane_bf16:
9042 case NEON::BI__builtin_neon_vget_lane_f32:
9043 case NEON::BI__builtin_neon_vgetq_lane_i8:
9044 case NEON::BI__builtin_neon_vgetq_lane_i16:
9045 case NEON::BI__builtin_neon_vgetq_lane_i32:
9046 case NEON::BI__builtin_neon_vgetq_lane_i64:
9047 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9048 case NEON::BI__builtin_neon_vgetq_lane_f32:
9049 case NEON::BI__builtin_neon_vduph_lane_bf16:
9050 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9051 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9052
9053 case NEON::BI__builtin_neon_vrndns_f32: {
9054 Value *Arg = EmitScalarExpr(E->getArg(0));
9055 llvm::Type *Tys[] = {Arg->getType()};
9056 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9057 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9058
9059 case NEON::BI__builtin_neon_vset_lane_i8:
9060 case NEON::BI__builtin_neon_vset_lane_i16:
9061 case NEON::BI__builtin_neon_vset_lane_i32:
9062 case NEON::BI__builtin_neon_vset_lane_i64:
9063 case NEON::BI__builtin_neon_vset_lane_bf16:
9064 case NEON::BI__builtin_neon_vset_lane_f32:
9065 case NEON::BI__builtin_neon_vsetq_lane_i8:
9066 case NEON::BI__builtin_neon_vsetq_lane_i16:
9067 case NEON::BI__builtin_neon_vsetq_lane_i32:
9068 case NEON::BI__builtin_neon_vsetq_lane_i64:
9069 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9070 case NEON::BI__builtin_neon_vsetq_lane_f32:
9071 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9072
9073 case NEON::BI__builtin_neon_vsha1h_u32:
9074 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9075 "vsha1h");
9076 case NEON::BI__builtin_neon_vsha1cq_u32:
9077 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9078 "vsha1h");
9079 case NEON::BI__builtin_neon_vsha1pq_u32:
9080 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9081 "vsha1h");
9082 case NEON::BI__builtin_neon_vsha1mq_u32:
9083 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9084 "vsha1h");
9085
9086 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9087 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9088 "vcvtbfp2bf");
9089 }
9090
9091 // The ARM _MoveToCoprocessor builtins put the input register value as
9092 // the first argument, but the LLVM intrinsic expects it as the third one.
9093 case clang::ARM::BI_MoveToCoprocessor:
9094 case clang::ARM::BI_MoveToCoprocessor2: {
9095 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9096 ? Intrinsic::arm_mcr
9097 : Intrinsic::arm_mcr2);
9098 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9099 Ops[3], Ops[4], Ops[5]});
9100 }
9101 }
9102
9103 // Get the last argument, which specifies the vector type.
9104 assert(HasExtraArg);
9105 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9106 std::optional<llvm::APSInt> Result =
9108 if (!Result)
9109 return nullptr;
9110
9111 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9112 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9113 // Determine the overloaded type of this builtin.
9114 llvm::Type *Ty;
9115 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9116 Ty = FloatTy;
9117 else
9118 Ty = DoubleTy;
9119
9120 // Determine whether this is an unsigned conversion or not.
9121 bool usgn = Result->getZExtValue() == 1;
9122 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9123
9124 // Call the appropriate intrinsic.
9125 Function *F = CGM.getIntrinsic(Int, Ty);
9126 return Builder.CreateCall(F, Ops, "vcvtr");
9127 }
9128
9129 // Determine the type of this overloaded NEON intrinsic.
9130 NeonTypeFlags Type = Result->getZExtValue();
9131 bool usgn = Type.isUnsigned();
9132 bool rightShift = false;
9133
9134 llvm::FixedVectorType *VTy =
9135 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9136 getTarget().hasBFloat16Type());
9137 llvm::Type *Ty = VTy;
9138 if (!Ty)
9139 return nullptr;
9140
9141 // Many NEON builtins have identical semantics and uses in ARM and
9142 // AArch64. Emit these in a single function.
9143 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9144 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9145 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9146 if (Builtin)
9148 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9149 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9150
9151 unsigned Int;
9152 switch (BuiltinID) {
9153 default: return nullptr;
9154 case NEON::BI__builtin_neon_vld1q_lane_v:
9155 // Handle 64-bit integer elements as a special case. Use shuffles of
9156 // one-element vectors to avoid poor code for i64 in the backend.
9157 if (VTy->getElementType()->isIntegerTy(64)) {
9158 // Extract the other lane.
9159 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9160 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9161 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9162 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9163 // Load the value as a one-element vector.
9164 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9165 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9166 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9167 Value *Align = getAlignmentValue32(PtrOp0);
9168 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9169 // Combine them.
9170 int Indices[] = {1 - Lane, Lane};
9171 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9172 }
9173 [[fallthrough]];
9174 case NEON::BI__builtin_neon_vld1_lane_v: {
9175 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9176 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9177 Value *Ld = Builder.CreateLoad(PtrOp0);
9178 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9179 }
9180 case NEON::BI__builtin_neon_vqrshrn_n_v:
9181 Int =
9182 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9183 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9184 1, true);
9185 case NEON::BI__builtin_neon_vqrshrun_n_v:
9186 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9187 Ops, "vqrshrun_n", 1, true);
9188 case NEON::BI__builtin_neon_vqshrn_n_v:
9189 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9190 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9191 1, true);
9192 case NEON::BI__builtin_neon_vqshrun_n_v:
9193 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9194 Ops, "vqshrun_n", 1, true);
9195 case NEON::BI__builtin_neon_vrecpe_v:
9196 case NEON::BI__builtin_neon_vrecpeq_v:
9197 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9198 Ops, "vrecpe");
9199 case NEON::BI__builtin_neon_vrshrn_n_v:
9200 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9201 Ops, "vrshrn_n", 1, true);
9202 case NEON::BI__builtin_neon_vrsra_n_v:
9203 case NEON::BI__builtin_neon_vrsraq_n_v:
9204 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9205 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9206 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9207 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9208 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9209 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9210 case NEON::BI__builtin_neon_vsri_n_v:
9211 case NEON::BI__builtin_neon_vsriq_n_v:
9212 rightShift = true;
9213 [[fallthrough]];
9214 case NEON::BI__builtin_neon_vsli_n_v:
9215 case NEON::BI__builtin_neon_vsliq_n_v:
9216 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9217 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9218 Ops, "vsli_n");
9219 case NEON::BI__builtin_neon_vsra_n_v:
9220 case NEON::BI__builtin_neon_vsraq_n_v:
9221 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9222 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9223 return Builder.CreateAdd(Ops[0], Ops[1]);
9224 case NEON::BI__builtin_neon_vst1q_lane_v:
9225 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9226 // a one-element vector and avoid poor code for i64 in the backend.
9227 if (VTy->getElementType()->isIntegerTy(64)) {
9228 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9229 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9230 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9231 Ops[2] = getAlignmentValue32(PtrOp0);
9232 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9233 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9234 Tys), Ops);
9235 }
9236 [[fallthrough]];
9237 case NEON::BI__builtin_neon_vst1_lane_v: {
9238 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9239 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9240 return Builder.CreateStore(Ops[1],
9241 PtrOp0.withElementType(Ops[1]->getType()));
9242 }
9243 case NEON::BI__builtin_neon_vtbl1_v:
9244 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9245 Ops, "vtbl1");
9246 case NEON::BI__builtin_neon_vtbl2_v:
9247 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9248 Ops, "vtbl2");
9249 case NEON::BI__builtin_neon_vtbl3_v:
9250 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9251 Ops, "vtbl3");
9252 case NEON::BI__builtin_neon_vtbl4_v:
9253 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9254 Ops, "vtbl4");
9255 case NEON::BI__builtin_neon_vtbx1_v:
9256 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9257 Ops, "vtbx1");
9258 case NEON::BI__builtin_neon_vtbx2_v:
9259 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9260 Ops, "vtbx2");
9261 case NEON::BI__builtin_neon_vtbx3_v:
9262 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9263 Ops, "vtbx3");
9264 case NEON::BI__builtin_neon_vtbx4_v:
9265 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9266 Ops, "vtbx4");
9267 }
9268}
9269
9270template<typename Integer>
9272 return E->getIntegerConstantExpr(Context)->getExtValue();
9273}
9274
9275static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9276 llvm::Type *T, bool Unsigned) {
9277 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9278 // which finds it convenient to specify signed/unsigned as a boolean flag.
9279 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9280}
9281
9282static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9283 uint32_t Shift, bool Unsigned) {
9284 // MVE helper function for integer shift right. This must handle signed vs
9285 // unsigned, and also deal specially with the case where the shift count is
9286 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9287 // undefined behavior, but in MVE it's legal, so we must convert it to code
9288 // that is not undefined in IR.
9289 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9290 ->getElementType()
9291 ->getPrimitiveSizeInBits();
9292 if (Shift == LaneBits) {
9293 // An unsigned shift of the full lane size always generates zero, so we can
9294 // simply emit a zero vector. A signed shift of the full lane size does the
9295 // same thing as shifting by one bit fewer.
9296 if (Unsigned)
9297 return llvm::Constant::getNullValue(V->getType());
9298 else
9299 --Shift;
9300 }
9301 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9302}
9303
9304static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9305 // MVE-specific helper function for a vector splat, which infers the element
9306 // count of the output vector by knowing that MVE vectors are all 128 bits
9307 // wide.
9308 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9309 return Builder.CreateVectorSplat(Elements, V);
9310}
9311
9312static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9313 CodeGenFunction *CGF,
9314 llvm::Value *V,
9315 llvm::Type *DestType) {
9316 // Convert one MVE vector type into another by reinterpreting its in-register
9317 // format.
9318 //
9319 // Little-endian, this is identical to a bitcast (which reinterprets the
9320 // memory format). But big-endian, they're not necessarily the same, because
9321 // the register and memory formats map to each other differently depending on
9322 // the lane size.
9323 //
9324 // We generate a bitcast whenever we can (if we're little-endian, or if the
9325 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9326 // that performs the different kind of reinterpretation.
9327 if (CGF->getTarget().isBigEndian() &&
9328 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9329 return Builder.CreateCall(
9330 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9331 {DestType, V->getType()}),
9332 V);
9333 } else {
9334 return Builder.CreateBitCast(V, DestType);
9335 }
9336}
9337
9338static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9339 // Make a shufflevector that extracts every other element of a vector (evens
9340 // or odds, as desired).
9341 SmallVector<int, 16> Indices;
9342 unsigned InputElements =
9343 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9344 for (unsigned i = 0; i < InputElements; i += 2)
9345 Indices.push_back(i + Odd);
9346 return Builder.CreateShuffleVector(V, Indices);
9347}
9348
9349static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9350 llvm::Value *V1) {
9351 // Make a shufflevector that interleaves two vectors element by element.
9352 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9353 SmallVector<int, 16> Indices;
9354 unsigned InputElements =
9355 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9356 for (unsigned i = 0; i < InputElements; i++) {
9357 Indices.push_back(i);
9358 Indices.push_back(i + InputElements);
9359 }
9360 return Builder.CreateShuffleVector(V0, V1, Indices);
9361}
9362
9363template<unsigned HighBit, unsigned OtherBits>
9364static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9365 // MVE-specific helper function to make a vector splat of a constant such as
9366 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9367 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9368 unsigned LaneBits = T->getPrimitiveSizeInBits();
9369 uint32_t Value = HighBit << (LaneBits - 1);
9370 if (OtherBits)
9371 Value |= (1UL << (LaneBits - 1)) - 1;
9372 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9373 return ARMMVEVectorSplat(Builder, Lane);
9374}
9375
9376static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9377 llvm::Value *V,
9378 unsigned ReverseWidth) {
9379 // MVE-specific helper function which reverses the elements of a
9380 // vector within every (ReverseWidth)-bit collection of lanes.
9381 SmallVector<int, 16> Indices;
9382 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9383 unsigned Elements = 128 / LaneSize;
9384 unsigned Mask = ReverseWidth / LaneSize - 1;
9385 for (unsigned i = 0; i < Elements; i++)
9386 Indices.push_back(i ^ Mask);
9387 return Builder.CreateShuffleVector(V, Indices);
9388}
9389
9391 const CallExpr *E,
9392 ReturnValueSlot ReturnValue,
9393 llvm::Triple::ArchType Arch) {
9394 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9395 Intrinsic::ID IRIntr;
9396 unsigned NumVectors;
9397
9398 // Code autogenerated by Tablegen will handle all the simple builtins.
9399 switch (BuiltinID) {
9400 #include "clang/Basic/arm_mve_builtin_cg.inc"
9401
9402 // If we didn't match an MVE builtin id at all, go back to the
9403 // main EmitARMBuiltinExpr.
9404 default:
9405 return nullptr;
9406 }
9407
9408 // Anything that breaks from that switch is an MVE builtin that
9409 // needs handwritten code to generate.
9410
9411 switch (CustomCodeGenType) {
9412
9413 case CustomCodeGen::VLD24: {
9416
9417 auto MvecCType = E->getType();
9418 auto MvecLType = ConvertType(MvecCType);
9419 assert(MvecLType->isStructTy() &&
9420 "Return type for vld[24]q should be a struct");
9421 assert(MvecLType->getStructNumElements() == 1 &&
9422 "Return-type struct for vld[24]q should have one element");
9423 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9424 assert(MvecLTypeInner->isArrayTy() &&
9425 "Return-type struct for vld[24]q should contain an array");
9426 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9427 "Array member of return-type struct vld[24]q has wrong length");
9428 auto VecLType = MvecLTypeInner->getArrayElementType();
9429
9430 Tys.push_back(VecLType);
9431
9432 auto Addr = E->getArg(0);
9433 Ops.push_back(EmitScalarExpr(Addr));
9434 Tys.push_back(ConvertType(Addr->getType()));
9435
9436 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9437 Value *LoadResult = Builder.CreateCall(F, Ops);
9438 Value *MvecOut = PoisonValue::get(MvecLType);
9439 for (unsigned i = 0; i < NumVectors; ++i) {
9440 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9441 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9442 }
9443
9444 if (ReturnValue.isNull())
9445 return MvecOut;
9446 else
9447 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9448 }
9449
9450 case CustomCodeGen::VST24: {
9453
9454 auto Addr = E->getArg(0);
9455 Ops.push_back(EmitScalarExpr(Addr));
9456 Tys.push_back(ConvertType(Addr->getType()));
9457
9458 auto MvecCType = E->getArg(1)->getType();
9459 auto MvecLType = ConvertType(MvecCType);
9460 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9461 assert(MvecLType->getStructNumElements() == 1 &&
9462 "Data-type struct for vst2q should have one element");
9463 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9464 assert(MvecLTypeInner->isArrayTy() &&
9465 "Data-type struct for vst2q should contain an array");
9466 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9467 "Array member of return-type struct vld[24]q has wrong length");
9468 auto VecLType = MvecLTypeInner->getArrayElementType();
9469
9470 Tys.push_back(VecLType);
9471
9472 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9473 EmitAggExpr(E->getArg(1), MvecSlot);
9474 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9475 for (unsigned i = 0; i < NumVectors; i++)
9476 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9477
9478 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9479 Value *ToReturn = nullptr;
9480 for (unsigned i = 0; i < NumVectors; i++) {
9481 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9482 ToReturn = Builder.CreateCall(F, Ops);
9483 Ops.pop_back();
9484 }
9485 return ToReturn;
9486 }
9487 }
9488 llvm_unreachable("unknown custom codegen type.");
9489}
9490
9492 const CallExpr *E,
9493 ReturnValueSlot ReturnValue,
9494 llvm::Triple::ArchType Arch) {
9495 switch (BuiltinID) {
9496 default:
9497 return nullptr;
9498#include "clang/Basic/arm_cde_builtin_cg.inc"
9499 }
9500}
9501
9502static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9503 const CallExpr *E,
9505 llvm::Triple::ArchType Arch) {
9506 unsigned int Int = 0;
9507 const char *s = nullptr;
9508
9509 switch (BuiltinID) {
9510 default:
9511 return nullptr;
9512 case NEON::BI__builtin_neon_vtbl1_v:
9513 case NEON::BI__builtin_neon_vqtbl1_v:
9514 case NEON::BI__builtin_neon_vqtbl1q_v:
9515 case NEON::BI__builtin_neon_vtbl2_v:
9516 case NEON::BI__builtin_neon_vqtbl2_v:
9517 case NEON::BI__builtin_neon_vqtbl2q_v:
9518 case NEON::BI__builtin_neon_vtbl3_v:
9519 case NEON::BI__builtin_neon_vqtbl3_v:
9520 case NEON::BI__builtin_neon_vqtbl3q_v:
9521 case NEON::BI__builtin_neon_vtbl4_v:
9522 case NEON::BI__builtin_neon_vqtbl4_v:
9523 case NEON::BI__builtin_neon_vqtbl4q_v:
9524 break;
9525 case NEON::BI__builtin_neon_vtbx1_v:
9526 case NEON::BI__builtin_neon_vqtbx1_v:
9527 case NEON::BI__builtin_neon_vqtbx1q_v:
9528 case NEON::BI__builtin_neon_vtbx2_v:
9529 case NEON::BI__builtin_neon_vqtbx2_v:
9530 case NEON::BI__builtin_neon_vqtbx2q_v:
9531 case NEON::BI__builtin_neon_vtbx3_v:
9532 case NEON::BI__builtin_neon_vqtbx3_v:
9533 case NEON::BI__builtin_neon_vqtbx3q_v:
9534 case NEON::BI__builtin_neon_vtbx4_v:
9535 case NEON::BI__builtin_neon_vqtbx4_v:
9536 case NEON::BI__builtin_neon_vqtbx4q_v:
9537 break;
9538 }
9539
9540 assert(E->getNumArgs() >= 3);
9541
9542 // Get the last argument, which specifies the vector type.
9543 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9544 std::optional<llvm::APSInt> Result =
9546 if (!Result)
9547 return nullptr;
9548
9549 // Determine the type of this overloaded NEON intrinsic.
9550 NeonTypeFlags Type = Result->getZExtValue();
9551 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9552 if (!Ty)
9553 return nullptr;
9554
9555 CodeGen::CGBuilderTy &Builder = CGF.Builder;
9556
9557 // AArch64 scalar builtins are not overloaded, they do not have an extra
9558 // argument that specifies the vector type, need to handle each case.
9559 switch (BuiltinID) {
9560 case NEON::BI__builtin_neon_vtbl1_v: {
9561 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9562 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9563 }
9564 case NEON::BI__builtin_neon_vtbl2_v: {
9565 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9566 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9567 }
9568 case NEON::BI__builtin_neon_vtbl3_v: {
9569 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9570 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9571 }
9572 case NEON::BI__builtin_neon_vtbl4_v: {
9573 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9574 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9575 }
9576 case NEON::BI__builtin_neon_vtbx1_v: {
9577 Value *TblRes =
9578 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9579 Intrinsic::aarch64_neon_tbl1, "vtbl1");
9580
9581 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9582 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9583 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9584
9585 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9586 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9587 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9588 }
9589 case NEON::BI__builtin_neon_vtbx2_v: {
9590 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9591 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9592 }
9593 case NEON::BI__builtin_neon_vtbx3_v: {
9594 Value *TblRes =
9595 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9596 Intrinsic::aarch64_neon_tbl2, "vtbl2");
9597
9598 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9599 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9600 TwentyFourV);
9601 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9602
9603 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9604 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9605 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9606 }
9607 case NEON::BI__builtin_neon_vtbx4_v: {
9608 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9609 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9610 }
9611 case NEON::BI__builtin_neon_vqtbl1_v:
9612 case NEON::BI__builtin_neon_vqtbl1q_v:
9613 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9614 case NEON::BI__builtin_neon_vqtbl2_v:
9615 case NEON::BI__builtin_neon_vqtbl2q_v: {
9616 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9617 case NEON::BI__builtin_neon_vqtbl3_v:
9618 case NEON::BI__builtin_neon_vqtbl3q_v:
9619 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9620 case NEON::BI__builtin_neon_vqtbl4_v:
9621 case NEON::BI__builtin_neon_vqtbl4q_v:
9622 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9623 case NEON::BI__builtin_neon_vqtbx1_v:
9624 case NEON::BI__builtin_neon_vqtbx1q_v:
9625 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9626 case NEON::BI__builtin_neon_vqtbx2_v:
9627 case NEON::BI__builtin_neon_vqtbx2q_v:
9628 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9629 case NEON::BI__builtin_neon_vqtbx3_v:
9630 case NEON::BI__builtin_neon_vqtbx3q_v:
9631 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9632 case NEON::BI__builtin_neon_vqtbx4_v:
9633 case NEON::BI__builtin_neon_vqtbx4q_v:
9634 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9635 }
9636 }
9637
9638 if (!Int)
9639 return nullptr;
9640
9641 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9642 return CGF.EmitNeonCall(F, Ops, s);
9643}
9644
9646 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9647 Op = Builder.CreateBitCast(Op, Int16Ty);
9648 Value *V = PoisonValue::get(VTy);
9649 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9650 Op = Builder.CreateInsertElement(V, Op, CI);
9651 return Op;
9652}
9653
9654/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9655/// access builtin. Only required if it can't be inferred from the base pointer
9656/// operand.
9657llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
9658 switch (TypeFlags.getMemEltType()) {
9659 case SVETypeFlags::MemEltTyDefault:
9660 return getEltType(TypeFlags);
9661 case SVETypeFlags::MemEltTyInt8:
9662 return Builder.getInt8Ty();
9663 case SVETypeFlags::MemEltTyInt16:
9664 return Builder.getInt16Ty();
9665 case SVETypeFlags::MemEltTyInt32:
9666 return Builder.getInt32Ty();
9667 case SVETypeFlags::MemEltTyInt64:
9668 return Builder.getInt64Ty();
9669 }
9670 llvm_unreachable("Unknown MemEltType");
9671}
9672
9673llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
9674 switch (TypeFlags.getEltType()) {
9675 default:
9676 llvm_unreachable("Invalid SVETypeFlag!");
9677
9678 case SVETypeFlags::EltTyInt8:
9679 return Builder.getInt8Ty();
9680 case SVETypeFlags::EltTyInt16:
9681 return Builder.getInt16Ty();
9682 case SVETypeFlags::EltTyInt32:
9683 return Builder.getInt32Ty();
9684 case SVETypeFlags::EltTyInt64:
9685 return Builder.getInt64Ty();
9686 case SVETypeFlags::EltTyInt128:
9687 return Builder.getInt128Ty();
9688
9689 case SVETypeFlags::EltTyFloat16:
9690 return Builder.getHalfTy();
9691 case SVETypeFlags::EltTyFloat32:
9692 return Builder.getFloatTy();
9693 case SVETypeFlags::EltTyFloat64:
9694 return Builder.getDoubleTy();
9695
9696 case SVETypeFlags::EltTyBFloat16:
9697 return Builder.getBFloatTy();
9698
9699 case SVETypeFlags::EltTyBool8:
9700 case SVETypeFlags::EltTyBool16:
9701 case SVETypeFlags::EltTyBool32:
9702 case SVETypeFlags::EltTyBool64:
9703 return Builder.getInt1Ty();
9704 }
9705}
9706
9707// Return the llvm predicate vector type corresponding to the specified element
9708// TypeFlags.
9709llvm::ScalableVectorType *
9711 switch (TypeFlags.getEltType()) {
9712 default: llvm_unreachable("Unhandled SVETypeFlag!");
9713
9714 case SVETypeFlags::EltTyInt8:
9715 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9716 case SVETypeFlags::EltTyInt16:
9717 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9718 case SVETypeFlags::EltTyInt32:
9719 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9720 case SVETypeFlags::EltTyInt64:
9721 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9722
9723 case SVETypeFlags::EltTyBFloat16:
9724 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9725 case SVETypeFlags::EltTyFloat16:
9726 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9727 case SVETypeFlags::EltTyFloat32:
9728 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9729 case SVETypeFlags::EltTyFloat64:
9730 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9731
9732 case SVETypeFlags::EltTyBool8:
9733 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9734 case SVETypeFlags::EltTyBool16:
9735 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9736 case SVETypeFlags::EltTyBool32:
9737 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9738 case SVETypeFlags::EltTyBool64:
9739 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9740 }
9741}
9742
9743// Return the llvm vector type corresponding to the specified element TypeFlags.
9744llvm::ScalableVectorType *
9745CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
9746 switch (TypeFlags.getEltType()) {
9747 default:
9748 llvm_unreachable("Invalid SVETypeFlag!");
9749
9750 case SVETypeFlags::EltTyInt8:
9751 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
9752 case SVETypeFlags::EltTyInt16:
9753 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
9754 case SVETypeFlags::EltTyInt32:
9755 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
9756 case SVETypeFlags::EltTyInt64:
9757 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
9758
9759 case SVETypeFlags::EltTyFloat16:
9760 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
9761 case SVETypeFlags::EltTyBFloat16:
9762 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
9763 case SVETypeFlags::EltTyFloat32:
9764 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
9765 case SVETypeFlags::EltTyFloat64:
9766 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
9767
9768 case SVETypeFlags::EltTyBool8:
9769 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9770 case SVETypeFlags::EltTyBool16:
9771 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9772 case SVETypeFlags::EltTyBool32:
9773 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9774 case SVETypeFlags::EltTyBool64:
9775 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9776 }
9777}
9778
9779llvm::Value *
9781 Function *Ptrue =
9782 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9783 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
9784}
9785
9786constexpr unsigned SVEBitsPerBlock = 128;
9787
9788static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9789 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9790 return llvm::ScalableVectorType::get(EltTy, NumElts);
9791}
9792
9793// Reinterpret the input predicate so that it can be used to correctly isolate
9794// the elements of the specified datatype.
9796 llvm::ScalableVectorType *VTy) {
9797
9798 if (isa<TargetExtType>(Pred->getType()) &&
9799 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
9800 return Pred;
9801
9802 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
9803 if (Pred->getType() == RTy)
9804 return Pred;
9805
9806 unsigned IntID;
9807 llvm::Type *IntrinsicTy;
9808 switch (VTy->getMinNumElements()) {
9809 default:
9810 llvm_unreachable("unsupported element count!");
9811 case 1:
9812 case 2:
9813 case 4:
9814 case 8:
9815 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9816 IntrinsicTy = RTy;
9817 break;
9818 case 16:
9819 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9820 IntrinsicTy = Pred->getType();
9821 break;
9822 }
9823
9824 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
9825 Value *C = Builder.CreateCall(F, Pred);
9826 assert(C->getType() == RTy && "Unexpected return type!");
9827 return C;
9828}
9829
9832 unsigned IntID) {
9833 auto *ResultTy = getSVEType(TypeFlags);
9834 auto *OverloadedTy =
9835 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
9836
9837 Function *F = nullptr;
9838 if (Ops[1]->getType()->isVectorTy())
9839 // This is the "vector base, scalar offset" case. In order to uniquely
9840 // map this built-in to an LLVM IR intrinsic, we need both the return type
9841 // and the type of the vector base.
9842 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
9843 else
9844 // This is the "scalar base, vector offset case". The type of the offset
9845 // is encoded in the name of the intrinsic. We only need to specify the
9846 // return type in order to uniquely map this built-in to an LLVM IR
9847 // intrinsic.
9848 F = CGM.getIntrinsic(IntID, OverloadedTy);
9849
9850 // At the ACLE level there's only one predicate type, svbool_t, which is
9851 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9852 // actual type being loaded. For example, when loading doubles (i64) the
9853 // predicate should be <n x 2 x i1> instead. At the IR level the type of
9854 // the predicate and the data being loaded must match. Cast to the type
9855 // expected by the intrinsic. The intrinsic itself should be defined in
9856 // a way than enforces relations between parameter types.
9857 Ops[0] = EmitSVEPredicateCast(
9858 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
9859
9860 // Pass 0 when the offset is missing. This can only be applied when using
9861 // the "vector base" addressing mode for which ACLE allows no offset. The
9862 // corresponding LLVM IR always requires an offset.
9863 if (Ops.size() == 2) {
9864 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9865 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9866 }
9867
9868 // For "vector base, scalar index" scale the index so that it becomes a
9869 // scalar offset.
9870 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
9871 unsigned BytesPerElt =
9872 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9873 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9874 }
9875
9876 Value *Call = Builder.CreateCall(F, Ops);
9877
9878 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9879 // other cases it's folded into a nop.
9880 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
9881 : Builder.CreateSExt(Call, ResultTy);
9882}
9883
9886 unsigned IntID) {
9887 auto *SrcDataTy = getSVEType(TypeFlags);
9888 auto *OverloadedTy =
9889 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
9890
9891 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9892 // it's the first argument. Move it accordingly.
9893 Ops.insert(Ops.begin(), Ops.pop_back_val());
9894
9895 Function *F = nullptr;
9896 if (Ops[2]->getType()->isVectorTy())
9897 // This is the "vector base, scalar offset" case. In order to uniquely
9898 // map this built-in to an LLVM IR intrinsic, we need both the return type
9899 // and the type of the vector base.
9900 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
9901 else
9902 // This is the "scalar base, vector offset case". The type of the offset
9903 // is encoded in the name of the intrinsic. We only need to specify the
9904 // return type in order to uniquely map this built-in to an LLVM IR
9905 // intrinsic.
9906 F = CGM.getIntrinsic(IntID, OverloadedTy);
9907
9908 // Pass 0 when the offset is missing. This can only be applied when using
9909 // the "vector base" addressing mode for which ACLE allows no offset. The
9910 // corresponding LLVM IR always requires an offset.
9911 if (Ops.size() == 3) {
9912 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9913 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9914 }
9915
9916 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9917 // folded into a nop.
9918 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
9919
9920 // At the ACLE level there's only one predicate type, svbool_t, which is
9921 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9922 // actual type being stored. For example, when storing doubles (i64) the
9923 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9924 // the predicate and the data being stored must match. Cast to the type
9925 // expected by the intrinsic. The intrinsic itself should be defined in
9926 // a way that enforces relations between parameter types.
9927 Ops[1] = EmitSVEPredicateCast(
9928 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
9929
9930 // For "vector base, scalar index" scale the index so that it becomes a
9931 // scalar offset.
9932 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
9933 unsigned BytesPerElt =
9934 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9935 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
9936 }
9937
9938 return Builder.CreateCall(F, Ops);
9939}
9940
9943 unsigned IntID) {
9944 // The gather prefetches are overloaded on the vector input - this can either
9945 // be the vector of base addresses or vector of offsets.
9946 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
9947 if (!OverloadedTy)
9948 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
9949
9950 // Cast the predicate from svbool_t to the right number of elements.
9951 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9952
9953 // vector + imm addressing modes
9954 if (Ops[1]->getType()->isVectorTy()) {
9955 if (Ops.size() == 3) {
9956 // Pass 0 for 'vector+imm' when the index is omitted.
9957 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9958
9959 // The sv_prfop is the last operand in the builtin and IR intrinsic.
9960 std::swap(Ops[2], Ops[3]);
9961 } else {
9962 // Index needs to be passed as scaled offset.
9963 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9964 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
9965 if (BytesPerElt > 1)
9966 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9967 }
9968 }
9969
9970 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
9971 return Builder.CreateCall(F, Ops);
9972}
9973
9976 unsigned IntID) {
9977 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9978
9979 unsigned N;
9980 switch (IntID) {
9981 case Intrinsic::aarch64_sve_ld2_sret:
9982 case Intrinsic::aarch64_sve_ld1_pn_x2:
9983 case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9984 case Intrinsic::aarch64_sve_ld2q_sret:
9985 N = 2;
9986 break;
9987 case Intrinsic::aarch64_sve_ld3_sret:
9988 case Intrinsic::aarch64_sve_ld3q_sret:
9989 N = 3;
9990 break;
9991 case Intrinsic::aarch64_sve_ld4_sret:
9992 case Intrinsic::aarch64_sve_ld1_pn_x4:
9993 case Intrinsic::aarch64_sve_ldnt1_pn_x4:
9994 case Intrinsic::aarch64_sve_ld4q_sret:
9995 N = 4;
9996 break;
9997 default:
9998 llvm_unreachable("unknown intrinsic!");
9999 }
10000 auto RetTy = llvm::VectorType::get(VTy->getElementType(),
10001 VTy->getElementCount() * N);
10002
10003 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10004 Value *BasePtr = Ops[1];
10005
10006 // Does the load have an offset?
10007 if (Ops.size() > 2)
10008 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10009
10010 Function *F = CGM.getIntrinsic(IntID, {VTy});
10011 Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
10012 unsigned MinElts = VTy->getMinNumElements();
10013 Value *Ret = llvm::PoisonValue::get(RetTy);
10014 for (unsigned I = 0; I < N; I++) {
10015 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10016 Value *SRet = Builder.CreateExtractValue(Call, I);
10017 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
10018 }
10019 return Ret;
10020}
10021
10024 unsigned IntID) {
10025 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10026
10027 unsigned N;
10028 switch (IntID) {
10029 case Intrinsic::aarch64_sve_st2:
10030 case Intrinsic::aarch64_sve_st1_pn_x2:
10031 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10032 case Intrinsic::aarch64_sve_st2q:
10033 N = 2;
10034 break;
10035 case Intrinsic::aarch64_sve_st3:
10036 case Intrinsic::aarch64_sve_st3q:
10037 N = 3;
10038 break;
10039 case Intrinsic::aarch64_sve_st4:
10040 case Intrinsic::aarch64_sve_st1_pn_x4:
10041 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10042 case Intrinsic::aarch64_sve_st4q:
10043 N = 4;
10044 break;
10045 default:
10046 llvm_unreachable("unknown intrinsic!");
10047 }
10048
10049 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10050 Value *BasePtr = Ops[1];
10051
10052 // Does the store have an offset?
10053 if (Ops.size() > (2 + N))
10054 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10055
10056 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10057 // need to break up the tuple vector.
10059 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10060 Operands.push_back(Ops[I]);
10061 Operands.append({Predicate, BasePtr});
10062 Function *F = CGM.getIntrinsic(IntID, { VTy });
10063
10064 return Builder.CreateCall(F, Operands);
10065}
10066
10067// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10068// svpmullt_pair intrinsics, with the exception that their results are bitcast
10069// to a wider type.
10072 unsigned BuiltinID) {
10073 // Splat scalar operand to vector (intrinsics with _n infix)
10074 if (TypeFlags.hasSplatOperand()) {
10075 unsigned OpNo = TypeFlags.getSplatOperand();
10076 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10077 }
10078
10079 // The pair-wise function has a narrower overloaded type.
10080 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10081 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10082
10083 // Now bitcast to the wider result type.
10084 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10085 return EmitSVEReinterpret(Call, Ty);
10086}
10087
10089 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10090 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10091 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10092 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10093}
10094
10097 unsigned BuiltinID) {
10098 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10099 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10100 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10101
10102 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10103 Value *BasePtr = Ops[1];
10104
10105 // Implement the index operand if not omitted.
10106 if (Ops.size() > 3)
10107 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10108
10109 Value *PrfOp = Ops.back();
10110
10111 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10112 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10113}
10114
10116 llvm::Type *ReturnTy,
10118 unsigned IntrinsicID,
10119 bool IsZExtReturn) {
10120 QualType LangPTy = E->getArg(1)->getType();
10121 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10122 LangPTy->castAs<PointerType>()->getPointeeType());
10123
10124 // The vector type that is returned may be different from the
10125 // eventual type loaded from memory.
10126 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10127 llvm::ScalableVectorType *MemoryTy = nullptr;
10128 llvm::ScalableVectorType *PredTy = nullptr;
10129 bool IsQuadLoad = false;
10130 switch (IntrinsicID) {
10131 case Intrinsic::aarch64_sve_ld1uwq:
10132 case Intrinsic::aarch64_sve_ld1udq:
10133 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10134 PredTy = llvm::ScalableVectorType::get(
10135 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10136 IsQuadLoad = true;
10137 break;
10138 default:
10139 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10140 PredTy = MemoryTy;
10141 break;
10142 }
10143
10144 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10145 Value *BasePtr = Ops[1];
10146
10147 // Does the load have an offset?
10148 if (Ops.size() > 2)
10149 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10150
10151 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10152 auto *Load =
10153 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10154 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10155 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10156
10157 if (IsQuadLoad)
10158 return Load;
10159
10160 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10161 : Builder.CreateSExt(Load, VectorTy);
10162}
10163
10166 unsigned IntrinsicID) {
10167 QualType LangPTy = E->getArg(1)->getType();
10168 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10169 LangPTy->castAs<PointerType>()->getPointeeType());
10170
10171 // The vector type that is stored may be different from the
10172 // eventual type stored to memory.
10173 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10174 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10175
10176 auto PredTy = MemoryTy;
10177 auto AddrMemoryTy = MemoryTy;
10178 bool IsQuadStore = false;
10179
10180 switch (IntrinsicID) {
10181 case Intrinsic::aarch64_sve_st1wq:
10182 case Intrinsic::aarch64_sve_st1dq:
10183 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10184 PredTy =
10185 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10186 IsQuadStore = true;
10187 break;
10188 default:
10189 break;
10190 }
10191 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10192 Value *BasePtr = Ops[1];
10193
10194 // Does the store have an offset?
10195 if (Ops.size() == 4)
10196 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10197
10198 // Last value is always the data
10199 Value *Val =
10200 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10201
10202 Function *F =
10203 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10204 auto *Store =
10205 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10206 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10207 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10208 return Store;
10209}
10210
10213 unsigned IntID) {
10214 Ops[2] = EmitSVEPredicateCast(
10216
10217 SmallVector<Value *> NewOps;
10218 NewOps.push_back(Ops[2]);
10219
10220 llvm::Value *BasePtr = Ops[3];
10221
10222 // If the intrinsic contains the vnum parameter, multiply it with the vector
10223 // size in bytes.
10224 if (Ops.size() == 5) {
10225 Function *StreamingVectorLength =
10226 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10227 llvm::Value *StreamingVectorLengthCall =
10228 Builder.CreateCall(StreamingVectorLength);
10229 llvm::Value *Mulvl =
10230 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10231 // The type of the ptr parameter is void *, so use Int8Ty here.
10232 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10233 }
10234 NewOps.push_back(BasePtr);
10235 NewOps.push_back(Ops[0]);
10236 NewOps.push_back(Ops[1]);
10237 Function *F = CGM.getIntrinsic(IntID);
10238 return Builder.CreateCall(F, NewOps);
10239}
10240
10243 unsigned IntID) {
10244 auto *VecTy = getSVEType(TypeFlags);
10245 Function *F = CGM.getIntrinsic(IntID, VecTy);
10246 if (TypeFlags.isReadZA())
10247 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10248 else if (TypeFlags.isWriteZA())
10249 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10250 return Builder.CreateCall(F, Ops);
10251}
10252
10255 unsigned IntID) {
10256 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10257 if (Ops.size() == 0)
10258 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10259 Function *F = CGM.getIntrinsic(IntID, {});
10260 return Builder.CreateCall(F, Ops);
10261}
10262
10265 unsigned IntID) {
10266 if (Ops.size() == 2)
10267 Ops.push_back(Builder.getInt32(0));
10268 else
10269 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10270 Function *F = CGM.getIntrinsic(IntID, {});
10271 return Builder.CreateCall(F, Ops);
10272}
10273
10274// Limit the usage of scalable llvm IR generated by the ACLE by using the
10275// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10276Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10277 return Builder.CreateVectorSplat(
10278 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10279}
10280
10282 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10283}
10284
10285Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10286 // FIXME: For big endian this needs an additional REV, or needs a separate
10287 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10288 // instruction is defined as 'bitwise' equivalent from memory point of
10289 // view (when storing/reloading), whereas the svreinterpret builtin
10290 // implements bitwise equivalent cast from register point of view.
10291 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10292 return Builder.CreateBitCast(Val, Ty);
10293}
10294
10295static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10297 auto *SplatZero = Constant::getNullValue(Ty);
10298 Ops.insert(Ops.begin(), SplatZero);
10299}
10300
10301static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10303 auto *SplatUndef = UndefValue::get(Ty);
10304 Ops.insert(Ops.begin(), SplatUndef);
10305}
10306
10309 llvm::Type *ResultType,
10310 ArrayRef<Value *> Ops) {
10311 if (TypeFlags.isOverloadNone())
10312 return {};
10313
10314 llvm::Type *DefaultType = getSVEType(TypeFlags);
10315
10316 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10317 return {DefaultType, Ops[1]->getType()};
10318
10319 if (TypeFlags.isOverloadWhileRW())
10320 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10321
10322 if (TypeFlags.isOverloadCvt())
10323 return {Ops[0]->getType(), Ops.back()->getType()};
10324
10325 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10326 ResultType->isVectorTy())
10327 return {ResultType, Ops[1]->getType()};
10328
10329 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10330 return {DefaultType};
10331}
10332
10334 llvm::Type *Ty,
10335 ArrayRef<Value *> Ops) {
10336 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10337 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10338
10339 unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10340 auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10341 TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10342
10343 if (!SingleVecTy)
10344 return nullptr;
10345
10346 Value *Idx = ConstantInt::get(CGM.Int64Ty,
10347 I * SingleVecTy->getMinNumElements());
10348
10349 if (TypeFlags.isTupleSet())
10350 return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10351 return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10352}
10353
10355 llvm::Type *Ty,
10356 ArrayRef<Value *> Ops) {
10357 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10358
10359 auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10360
10361 if (!SrcTy)
10362 return nullptr;
10363
10364 unsigned MinElts = SrcTy->getMinNumElements();
10365 Value *Call = llvm::PoisonValue::get(Ty);
10366 for (unsigned I = 0; I < Ops.size(); I++) {
10367 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10368 Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10369 }
10370
10371 return Call;
10372}
10373
10375 // Multi-vector results should be broken up into a single (wide) result
10376 // vector.
10377 auto *StructTy = dyn_cast<StructType>(Call->getType());
10378 if (!StructTy)
10379 return Call;
10380
10381 auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
10382 if (!VTy)
10383 return Call;
10384 unsigned N = StructTy->getNumElements();
10385
10386 // We may need to emit a cast to a svbool_t
10387 bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
10388 unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
10389
10390 ScalableVectorType *WideVTy =
10391 ScalableVectorType::get(VTy->getElementType(), MinElts * N);
10392 Value *Ret = llvm::PoisonValue::get(WideVTy);
10393 for (unsigned I = 0; I < N; ++I) {
10394 Value *SRet = Builder.CreateExtractValue(Call, I);
10395 assert(SRet->getType() == VTy && "Unexpected type for result value");
10396 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10397
10398 if (IsPredTy)
10399 SRet = EmitSVEPredicateCast(
10400 SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
10401
10402 Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
10403 }
10404 Call = Ret;
10405
10406 return Call;
10407}
10408
10410 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10411 SVETypeFlags TypeFlags) {
10412 // Find out if any arguments are required to be integer constant expressions.
10413 unsigned ICEArguments = 0;
10415 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10416 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10417
10418 // Tuple set/get only requires one insert/extract vector, which is
10419 // created by EmitSVETupleSetOrGet.
10420 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10421
10422 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10423 bool IsICE = ICEArguments & (1 << i);
10424 Value *Arg = EmitScalarExpr(E->getArg(i));
10425
10426 if (IsICE) {
10427 // If this is required to be a constant, constant fold it so that we know
10428 // that the generated intrinsic gets a ConstantInt.
10429 std::optional<llvm::APSInt> Result =
10430 E->getArg(i)->getIntegerConstantExpr(getContext());
10431 assert(Result && "Expected argument to be a constant");
10432
10433 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10434 // truncate because the immediate has been range checked and no valid
10435 // immediate requires more than a handful of bits.
10436 *Result = Result->extOrTrunc(32);
10437 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10438 continue;
10439 }
10440
10441 if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10442 Ops.push_back(Arg);
10443 continue;
10444 }
10445
10446 auto *VTy = cast<ScalableVectorType>(Arg->getType());
10447 unsigned MinElts = VTy->getMinNumElements();
10448 bool IsPred = VTy->getElementType()->isIntegerTy(1);
10449 unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10450
10451 if (N == 1) {
10452 Ops.push_back(Arg);
10453 continue;
10454 }
10455
10456 for (unsigned I = 0; I < N; ++I) {
10457 Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10458 auto *NewVTy =
10459 ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10460 Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10461 }
10462 }
10463}
10464
10466 const CallExpr *E) {
10467 llvm::Type *Ty = ConvertType(E->getType());
10468 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10469 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10470 Value *Val = EmitScalarExpr(E->getArg(0));
10471 return EmitSVEReinterpret(Val, Ty);
10472 }
10473
10474 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10476
10478 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10479 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10480
10481 if (TypeFlags.isLoad())
10482 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10483 TypeFlags.isZExtReturn());
10484 else if (TypeFlags.isStore())
10485 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10486 else if (TypeFlags.isGatherLoad())
10487 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10488 else if (TypeFlags.isScatterStore())
10489 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10490 else if (TypeFlags.isPrefetch())
10491 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10492 else if (TypeFlags.isGatherPrefetch())
10493 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10494 else if (TypeFlags.isStructLoad())
10495 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10496 else if (TypeFlags.isStructStore())
10497 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10498 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10499 return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10500 else if (TypeFlags.isTupleCreate())
10501 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10502 else if (TypeFlags.isUndef())
10503 return UndefValue::get(Ty);
10504 else if (Builtin->LLVMIntrinsic != 0) {
10505 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10507
10508 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10510
10511 // Some ACLE builtins leave out the argument to specify the predicate
10512 // pattern, which is expected to be expanded to an SV_ALL pattern.
10513 if (TypeFlags.isAppendSVALL())
10514 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10515 if (TypeFlags.isInsertOp1SVALL())
10516 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10517
10518 // Predicates must match the main datatype.
10519 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10520 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10521 if (PredTy->getElementType()->isIntegerTy(1))
10522 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10523
10524 // Splat scalar operand to vector (intrinsics with _n infix)
10525 if (TypeFlags.hasSplatOperand()) {
10526 unsigned OpNo = TypeFlags.getSplatOperand();
10527 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10528 }
10529
10530 if (TypeFlags.isReverseCompare())
10531 std::swap(Ops[1], Ops[2]);
10532 else if (TypeFlags.isReverseUSDOT())
10533 std::swap(Ops[1], Ops[2]);
10534 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10535 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10536 std::swap(Ops[1], Ops[2]);
10537 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10538 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10539 std::swap(Ops[1], Ops[3]);
10540
10541 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10542 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10543 llvm::Type *OpndTy = Ops[1]->getType();
10544 auto *SplatZero = Constant::getNullValue(OpndTy);
10545 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10546 }
10547
10548 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10549 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10550 Value *Call = Builder.CreateCall(F, Ops);
10551
10552 // Predicate results must be converted to svbool_t.
10553 if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10554 if (PredTy->getScalarType()->isIntegerTy(1))
10555 Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10556
10557 return FormSVEBuiltinResult(Call);
10558 }
10559
10560 switch (BuiltinID) {
10561 default:
10562 return nullptr;
10563
10564 case SVE::BI__builtin_sve_svreinterpret_b: {
10565 auto SVCountTy =
10566 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10567 Function *CastFromSVCountF =
10568 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10569 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10570 }
10571 case SVE::BI__builtin_sve_svreinterpret_c: {
10572 auto SVCountTy =
10573 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10574 Function *CastToSVCountF =
10575 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10576 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10577 }
10578
10579 case SVE::BI__builtin_sve_svpsel_lane_b8:
10580 case SVE::BI__builtin_sve_svpsel_lane_b16:
10581 case SVE::BI__builtin_sve_svpsel_lane_b32:
10582 case SVE::BI__builtin_sve_svpsel_lane_b64:
10583 case SVE::BI__builtin_sve_svpsel_lane_c8:
10584 case SVE::BI__builtin_sve_svpsel_lane_c16:
10585 case SVE::BI__builtin_sve_svpsel_lane_c32:
10586 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10587 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10588 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10589 "aarch64.svcount")) &&
10590 "Unexpected TargetExtType");
10591 auto SVCountTy =
10592 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10593 Function *CastFromSVCountF =
10594 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10595 Function *CastToSVCountF =
10596 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10597
10598 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10599 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10600 llvm::Value *Ops0 =
10601 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10602 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10603 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10604 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10605 }
10606 case SVE::BI__builtin_sve_svmov_b_z: {
10607 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10608 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10609 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10610 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10611 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10612 }
10613
10614 case SVE::BI__builtin_sve_svnot_b_z: {
10615 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10616 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10617 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10618 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10619 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10620 }
10621
10622 case SVE::BI__builtin_sve_svmovlb_u16:
10623 case SVE::BI__builtin_sve_svmovlb_u32:
10624 case SVE::BI__builtin_sve_svmovlb_u64:
10625 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10626
10627 case SVE::BI__builtin_sve_svmovlb_s16:
10628 case SVE::BI__builtin_sve_svmovlb_s32:
10629 case SVE::BI__builtin_sve_svmovlb_s64:
10630 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10631
10632 case SVE::BI__builtin_sve_svmovlt_u16:
10633 case SVE::BI__builtin_sve_svmovlt_u32:
10634 case SVE::BI__builtin_sve_svmovlt_u64:
10635 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10636
10637 case SVE::BI__builtin_sve_svmovlt_s16:
10638 case SVE::BI__builtin_sve_svmovlt_s32:
10639 case SVE::BI__builtin_sve_svmovlt_s64:
10640 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10641
10642 case SVE::BI__builtin_sve_svpmullt_u16:
10643 case SVE::BI__builtin_sve_svpmullt_u64:
10644 case SVE::BI__builtin_sve_svpmullt_n_u16:
10645 case SVE::BI__builtin_sve_svpmullt_n_u64:
10646 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10647
10648 case SVE::BI__builtin_sve_svpmullb_u16:
10649 case SVE::BI__builtin_sve_svpmullb_u64:
10650 case SVE::BI__builtin_sve_svpmullb_n_u16:
10651 case SVE::BI__builtin_sve_svpmullb_n_u64:
10652 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10653
10654 case SVE::BI__builtin_sve_svdup_n_b8:
10655 case SVE::BI__builtin_sve_svdup_n_b16:
10656 case SVE::BI__builtin_sve_svdup_n_b32:
10657 case SVE::BI__builtin_sve_svdup_n_b64: {
10658 Value *CmpNE =
10659 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10660 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10661 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10662 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10663 }
10664
10665 case SVE::BI__builtin_sve_svdupq_n_b8:
10666 case SVE::BI__builtin_sve_svdupq_n_b16:
10667 case SVE::BI__builtin_sve_svdupq_n_b32:
10668 case SVE::BI__builtin_sve_svdupq_n_b64:
10669 case SVE::BI__builtin_sve_svdupq_n_u8:
10670 case SVE::BI__builtin_sve_svdupq_n_s8:
10671 case SVE::BI__builtin_sve_svdupq_n_u64:
10672 case SVE::BI__builtin_sve_svdupq_n_f64:
10673 case SVE::BI__builtin_sve_svdupq_n_s64:
10674 case SVE::BI__builtin_sve_svdupq_n_u16:
10675 case SVE::BI__builtin_sve_svdupq_n_f16:
10676 case SVE::BI__builtin_sve_svdupq_n_bf16:
10677 case SVE::BI__builtin_sve_svdupq_n_s16:
10678 case SVE::BI__builtin_sve_svdupq_n_u32:
10679 case SVE::BI__builtin_sve_svdupq_n_f32:
10680 case SVE::BI__builtin_sve_svdupq_n_s32: {
10681 // These builtins are implemented by storing each element to an array and using
10682 // ld1rq to materialize a vector.
10683 unsigned NumOpnds = Ops.size();
10684
10685 bool IsBoolTy =
10686 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10687
10688 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10689 // so that the compare can use the width that is natural for the expected
10690 // number of predicate lanes.
10691 llvm::Type *EltTy = Ops[0]->getType();
10692 if (IsBoolTy)
10693 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10694
10696 for (unsigned I = 0; I < NumOpnds; ++I)
10697 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10698 Value *Vec = BuildVector(VecOps);
10699
10700 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10701 Value *InsertSubVec = Builder.CreateInsertVector(
10702 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10703
10704 Function *F =
10705 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
10706 Value *DupQLane =
10707 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
10708
10709 if (!IsBoolTy)
10710 return DupQLane;
10711
10712 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10713 Value *Pred = EmitSVEAllTruePred(TypeFlags);
10714
10715 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
10716 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
10717 : Intrinsic::aarch64_sve_cmpne_wide,
10718 OverloadedTy);
10719 Value *Call = Builder.CreateCall(
10720 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
10721 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10722 }
10723
10724 case SVE::BI__builtin_sve_svpfalse_b:
10725 return ConstantInt::getFalse(Ty);
10726
10727 case SVE::BI__builtin_sve_svpfalse_c: {
10728 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
10729 Function *CastToSVCountF =
10730 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
10731 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
10732 }
10733
10734 case SVE::BI__builtin_sve_svlen_bf16:
10735 case SVE::BI__builtin_sve_svlen_f16:
10736 case SVE::BI__builtin_sve_svlen_f32:
10737 case SVE::BI__builtin_sve_svlen_f64:
10738 case SVE::BI__builtin_sve_svlen_s8:
10739 case SVE::BI__builtin_sve_svlen_s16:
10740 case SVE::BI__builtin_sve_svlen_s32:
10741 case SVE::BI__builtin_sve_svlen_s64:
10742 case SVE::BI__builtin_sve_svlen_u8:
10743 case SVE::BI__builtin_sve_svlen_u16:
10744 case SVE::BI__builtin_sve_svlen_u32:
10745 case SVE::BI__builtin_sve_svlen_u64: {
10746 SVETypeFlags TF(Builtin->TypeModifier);
10747 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
10748 auto *NumEls =
10749 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
10750
10751 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
10752 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
10753 }
10754
10755 case SVE::BI__builtin_sve_svtbl2_u8:
10756 case SVE::BI__builtin_sve_svtbl2_s8:
10757 case SVE::BI__builtin_sve_svtbl2_u16:
10758 case SVE::BI__builtin_sve_svtbl2_s16:
10759 case SVE::BI__builtin_sve_svtbl2_u32:
10760 case SVE::BI__builtin_sve_svtbl2_s32:
10761 case SVE::BI__builtin_sve_svtbl2_u64:
10762 case SVE::BI__builtin_sve_svtbl2_s64:
10763 case SVE::BI__builtin_sve_svtbl2_f16:
10764 case SVE::BI__builtin_sve_svtbl2_bf16:
10765 case SVE::BI__builtin_sve_svtbl2_f32:
10766 case SVE::BI__builtin_sve_svtbl2_f64: {
10767 SVETypeFlags TF(Builtin->TypeModifier);
10768 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10769 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10770 return Builder.CreateCall(F, Ops);
10771 }
10772
10773 case SVE::BI__builtin_sve_svset_neonq_s8:
10774 case SVE::BI__builtin_sve_svset_neonq_s16:
10775 case SVE::BI__builtin_sve_svset_neonq_s32:
10776 case SVE::BI__builtin_sve_svset_neonq_s64:
10777 case SVE::BI__builtin_sve_svset_neonq_u8:
10778 case SVE::BI__builtin_sve_svset_neonq_u16:
10779 case SVE::BI__builtin_sve_svset_neonq_u32:
10780 case SVE::BI__builtin_sve_svset_neonq_u64:
10781 case SVE::BI__builtin_sve_svset_neonq_f16:
10782 case SVE::BI__builtin_sve_svset_neonq_f32:
10783 case SVE::BI__builtin_sve_svset_neonq_f64:
10784 case SVE::BI__builtin_sve_svset_neonq_bf16: {
10785 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
10786 }
10787
10788 case SVE::BI__builtin_sve_svget_neonq_s8:
10789 case SVE::BI__builtin_sve_svget_neonq_s16:
10790 case SVE::BI__builtin_sve_svget_neonq_s32:
10791 case SVE::BI__builtin_sve_svget_neonq_s64:
10792 case SVE::BI__builtin_sve_svget_neonq_u8:
10793 case SVE::BI__builtin_sve_svget_neonq_u16:
10794 case SVE::BI__builtin_sve_svget_neonq_u32:
10795 case SVE::BI__builtin_sve_svget_neonq_u64:
10796 case SVE::BI__builtin_sve_svget_neonq_f16:
10797 case SVE::BI__builtin_sve_svget_neonq_f32:
10798 case SVE::BI__builtin_sve_svget_neonq_f64:
10799 case SVE::BI__builtin_sve_svget_neonq_bf16: {
10800 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
10801 }
10802
10803 case SVE::BI__builtin_sve_svdup_neonq_s8:
10804 case SVE::BI__builtin_sve_svdup_neonq_s16:
10805 case SVE::BI__builtin_sve_svdup_neonq_s32:
10806 case SVE::BI__builtin_sve_svdup_neonq_s64:
10807 case SVE::BI__builtin_sve_svdup_neonq_u8:
10808 case SVE::BI__builtin_sve_svdup_neonq_u16:
10809 case SVE::BI__builtin_sve_svdup_neonq_u32:
10810 case SVE::BI__builtin_sve_svdup_neonq_u64:
10811 case SVE::BI__builtin_sve_svdup_neonq_f16:
10812 case SVE::BI__builtin_sve_svdup_neonq_f32:
10813 case SVE::BI__builtin_sve_svdup_neonq_f64:
10814 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
10815 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
10816 Builder.getInt64(0));
10817 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
10818 {Insert, Builder.getInt64(0)});
10819 }
10820 }
10821
10822 /// Should not happen
10823 return nullptr;
10824}
10825
10826static void swapCommutativeSMEOperands(unsigned BuiltinID,
10828 unsigned MultiVec;
10829 switch (BuiltinID) {
10830 default:
10831 return;
10832 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
10833 MultiVec = 1;
10834 break;
10835 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
10836 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
10837 MultiVec = 2;
10838 break;
10839 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
10840 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
10841 MultiVec = 4;
10842 break;
10843 }
10844
10845 if (MultiVec > 0)
10846 for (unsigned I = 0; I < MultiVec; ++I)
10847 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
10848}
10849
10851 const CallExpr *E) {
10852 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10854
10856 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10857 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10858
10859 if (TypeFlags.isLoad() || TypeFlags.isStore())
10860 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10861 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
10862 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10863 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
10864 BuiltinID == SME::BI__builtin_sme_svzero_za)
10865 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10866 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
10867 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
10868 BuiltinID == SME::BI__builtin_sme_svldr_za ||
10869 BuiltinID == SME::BI__builtin_sme_svstr_za)
10870 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10871
10872 // Handle builtins which require their multi-vector operands to be swapped
10873 swapCommutativeSMEOperands(BuiltinID, Ops);
10874
10875 // Should not happen!
10876 if (Builtin->LLVMIntrinsic == 0)
10877 return nullptr;
10878
10879 // Predicates must match the main datatype.
10880 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10881 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10882 if (PredTy->getElementType()->isIntegerTy(1))
10883 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10884
10885 Function *F =
10886 TypeFlags.isOverloadNone()
10887 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
10888 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10889 Value *Call = Builder.CreateCall(F, Ops);
10890
10891 return FormSVEBuiltinResult(Call);
10892}
10893
10895 const CallExpr *E,
10896 llvm::Triple::ArchType Arch) {
10897 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10898 BuiltinID <= clang::AArch64::LastSVEBuiltin)
10899 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10900
10901 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10902 BuiltinID <= clang::AArch64::LastSMEBuiltin)
10903 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10904
10905 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
10906 return EmitAArch64CpuSupports(E);
10907
10908 unsigned HintID = static_cast<unsigned>(-1);
10909 switch (BuiltinID) {
10910 default: break;
10911 case clang::AArch64::BI__builtin_arm_nop:
10912 HintID = 0;
10913 break;
10914 case clang::AArch64::BI__builtin_arm_yield:
10915 case clang::AArch64::BI__yield:
10916 HintID = 1;
10917 break;
10918 case clang::AArch64::BI__builtin_arm_wfe:
10919 case clang::AArch64::BI__wfe:
10920 HintID = 2;
10921 break;
10922 case clang::AArch64::BI__builtin_arm_wfi:
10923 case clang::AArch64::BI__wfi:
10924 HintID = 3;
10925 break;
10926 case clang::AArch64::BI__builtin_arm_sev:
10927 case clang::AArch64::BI__sev:
10928 HintID = 4;
10929 break;
10930 case clang::AArch64::BI__builtin_arm_sevl:
10931 case clang::AArch64::BI__sevl:
10932 HintID = 5;
10933 break;
10934 }
10935
10936 if (HintID != static_cast<unsigned>(-1)) {
10937 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10938 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
10939 }
10940
10941 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
10942 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
10943 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10944 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
10945 }
10946
10947 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
10948 // Create call to __arm_sme_state and store the results to the two pointers.
10950 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
10951 false),
10952 "__arm_sme_state"));
10953 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
10954 "aarch64_pstate_sm_compatible");
10955 CI->setAttributes(Attrs);
10956 CI->setCallingConv(
10957 llvm::CallingConv::
10958 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
10959 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
10960 EmitPointerWithAlignment(E->getArg(0)));
10961 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
10962 EmitPointerWithAlignment(E->getArg(1)));
10963 }
10964
10965 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10966 assert((getContext().getTypeSize(E->getType()) == 32) &&
10967 "rbit of unusual size!");
10968 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10969 return Builder.CreateCall(
10970 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10971 }
10972 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
10973 assert((getContext().getTypeSize(E->getType()) == 64) &&
10974 "rbit of unusual size!");
10975 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10976 return Builder.CreateCall(
10977 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10978 }
10979
10980 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
10981 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
10982 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10983 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
10984 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
10985 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
10986 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
10987 return Res;
10988 }
10989
10990 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
10991 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10992 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
10993 "cls");
10994 }
10995 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
10996 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10997 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
10998 "cls");
10999 }
11000
11001 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11002 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11003 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11004 llvm::Type *Ty = Arg->getType();
11005 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11006 Arg, "frint32z");
11007 }
11008
11009 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11010 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11011 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11012 llvm::Type *Ty = Arg->getType();
11013 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11014 Arg, "frint64z");
11015 }
11016
11017 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11018 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11019 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11020 llvm::Type *Ty = Arg->getType();
11021 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11022 Arg, "frint32x");
11023 }
11024
11025 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11026 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11027 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11028 llvm::Type *Ty = Arg->getType();
11029 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11030 Arg, "frint64x");
11031 }
11032
11033 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11034 assert((getContext().getTypeSize(E->getType()) == 32) &&
11035 "__jcvt of unusual size!");
11036 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11037 return Builder.CreateCall(
11038 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11039 }
11040
11041 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11042 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11043 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11044 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11045 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11046 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11047
11048 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11049 // Load from the address via an LLVM intrinsic, receiving a
11050 // tuple of 8 i64 words, and store each one to ValPtr.
11051 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11052 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11053 llvm::Value *ToRet;
11054 for (size_t i = 0; i < 8; i++) {
11055 llvm::Value *ValOffsetPtr =
11056 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11057 Address Addr =
11058 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11059 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11060 }
11061 return ToRet;
11062 } else {
11063 // Load 8 i64 words from ValPtr, and store them to the address
11064 // via an LLVM intrinsic.
11066 Args.push_back(MemAddr);
11067 for (size_t i = 0; i < 8; i++) {
11068 llvm::Value *ValOffsetPtr =
11069 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11070 Address Addr =
11071 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11072 Args.push_back(Builder.CreateLoad(Addr));
11073 }
11074
11075 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11076 ? Intrinsic::aarch64_st64b
11077 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11078 ? Intrinsic::aarch64_st64bv
11079 : Intrinsic::aarch64_st64bv0);
11080 Function *F = CGM.getIntrinsic(Intr);
11081 return Builder.CreateCall(F, Args);
11082 }
11083 }
11084
11085 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11086 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11087
11088 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11089 ? Intrinsic::aarch64_rndr
11090 : Intrinsic::aarch64_rndrrs);
11091 Function *F = CGM.getIntrinsic(Intr);
11092 llvm::Value *Val = Builder.CreateCall(F);
11093 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11094 Value *Status = Builder.CreateExtractValue(Val, 1);
11095
11096 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11097 Builder.CreateStore(RandomValue, MemAddress);
11098 Status = Builder.CreateZExt(Status, Int32Ty);
11099 return Status;
11100 }
11101
11102 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11103 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11104 const FunctionDecl *FD = E->getDirectCallee();
11105 Value *Ops[2];
11106 for (unsigned i = 0; i < 2; i++)
11107 Ops[i] = EmitScalarExpr(E->getArg(i));
11108 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11109 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11110 StringRef Name = FD->getName();
11111 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11112 }
11113
11114 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11115 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11116 getContext().getTypeSize(E->getType()) == 128) {
11117 Function *F =
11118 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11119 ? Intrinsic::aarch64_ldaxp
11120 : Intrinsic::aarch64_ldxp);
11121
11122 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11123 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11124
11125 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11126 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11127 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11128 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11129 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11130
11131 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11132 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11133 Val = Builder.CreateOr(Val, Val1);
11134 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11135 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11136 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11137 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11138
11139 QualType Ty = E->getType();
11140 llvm::Type *RealResTy = ConvertType(Ty);
11141 llvm::Type *IntTy =
11142 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11143
11144 Function *F =
11145 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11146 ? Intrinsic::aarch64_ldaxr
11147 : Intrinsic::aarch64_ldxr,
11148 UnqualPtrTy);
11149 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11150 Val->addParamAttr(
11151 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11152
11153 if (RealResTy->isPointerTy())
11154 return Builder.CreateIntToPtr(Val, RealResTy);
11155
11156 llvm::Type *IntResTy = llvm::IntegerType::get(
11157 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11158 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11159 RealResTy);
11160 }
11161
11162 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11163 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11164 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11165 Function *F =
11166 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11167 ? Intrinsic::aarch64_stlxp
11168 : Intrinsic::aarch64_stxp);
11169 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11170
11171 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11172 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11173
11174 Tmp = Tmp.withElementType(STy);
11175 llvm::Value *Val = Builder.CreateLoad(Tmp);
11176
11177 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11178 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11179 Value *StPtr = EmitScalarExpr(E->getArg(1));
11180 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11181 }
11182
11183 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11184 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11185 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11186 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11187
11188 QualType Ty = E->getArg(0)->getType();
11189 llvm::Type *StoreTy =
11190 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11191
11192 if (StoreVal->getType()->isPointerTy())
11193 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11194 else {
11195 llvm::Type *IntTy = llvm::IntegerType::get(
11197 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11198 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11199 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11200 }
11201
11202 Function *F =
11203 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11204 ? Intrinsic::aarch64_stlxr
11205 : Intrinsic::aarch64_stxr,
11206 StoreAddr->getType());
11207 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11208 CI->addParamAttr(
11209 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11210 return CI;
11211 }
11212
11213 if (BuiltinID == clang::AArch64::BI__getReg) {
11215 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11216 llvm_unreachable("Sema will ensure that the parameter is constant");
11217
11218 llvm::APSInt Value = Result.Val.getInt();
11219 LLVMContext &Context = CGM.getLLVMContext();
11220 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11221
11222 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11223 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11224 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11225
11226 llvm::Function *F =
11227 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11228 return Builder.CreateCall(F, Metadata);
11229 }
11230
11231 if (BuiltinID == clang::AArch64::BI__break) {
11233 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11234 llvm_unreachable("Sema will ensure that the parameter is constant");
11235
11236 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11237 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11238 }
11239
11240 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11241 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11242 return Builder.CreateCall(F);
11243 }
11244
11245 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11246 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11247 llvm::SyncScope::SingleThread);
11248
11249 // CRC32
11250 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11251 switch (BuiltinID) {
11252 case clang::AArch64::BI__builtin_arm_crc32b:
11253 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11254 case clang::AArch64::BI__builtin_arm_crc32cb:
11255 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11256 case clang::AArch64::BI__builtin_arm_crc32h:
11257 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11258 case clang::AArch64::BI__builtin_arm_crc32ch:
11259 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11260 case clang::AArch64::BI__builtin_arm_crc32w:
11261 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11262 case clang::AArch64::BI__builtin_arm_crc32cw:
11263 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11264 case clang::AArch64::BI__builtin_arm_crc32d:
11265 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11266 case clang::AArch64::BI__builtin_arm_crc32cd:
11267 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11268 }
11269
11270 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11271 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11272 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11273 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11274
11275 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11276 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11277
11278 return Builder.CreateCall(F, {Arg0, Arg1});
11279 }
11280
11281 // Memory Operations (MOPS)
11282 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11283 Value *Dst = EmitScalarExpr(E->getArg(0));
11284 Value *Val = EmitScalarExpr(E->getArg(1));
11285 Value *Size = EmitScalarExpr(E->getArg(2));
11286 Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
11287 Val = Builder.CreateTrunc(Val, Int8Ty);
11288 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11289 return Builder.CreateCall(
11290 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11291 }
11292
11293 // Memory Tagging Extensions (MTE) Intrinsics
11294 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11295 switch (BuiltinID) {
11296 case clang::AArch64::BI__builtin_arm_irg:
11297 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11298 case clang::AArch64::BI__builtin_arm_addg:
11299 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11300 case clang::AArch64::BI__builtin_arm_gmi:
11301 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11302 case clang::AArch64::BI__builtin_arm_ldg:
11303 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11304 case clang::AArch64::BI__builtin_arm_stg:
11305 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11306 case clang::AArch64::BI__builtin_arm_subp:
11307 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11308 }
11309
11310 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11311 llvm::Type *T = ConvertType(E->getType());
11312
11313 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11314 Value *Pointer = EmitScalarExpr(E->getArg(0));
11315 Value *Mask = EmitScalarExpr(E->getArg(1));
11316
11317 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11318 Mask = Builder.CreateZExt(Mask, Int64Ty);
11319 Value *RV = Builder.CreateCall(
11320 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
11321 return Builder.CreatePointerCast(RV, T);
11322 }
11323 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11324 Value *Pointer = EmitScalarExpr(E->getArg(0));
11325 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11326
11327 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11328 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11329 Value *RV = Builder.CreateCall(
11330 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
11331 return Builder.CreatePointerCast(RV, T);
11332 }
11333 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11334 Value *Pointer = EmitScalarExpr(E->getArg(0));
11335 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11336
11337 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11338 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11339 return Builder.CreateCall(
11340 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11341 }
11342 // Although it is possible to supply a different return
11343 // address (first arg) to this intrinsic, for now we set
11344 // return address same as input address.
11345 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11346 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11347 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11348 Value *RV = Builder.CreateCall(
11349 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11350 return Builder.CreatePointerCast(RV, T);
11351 }
11352 // Although it is possible to supply a different tag (to set)
11353 // to this intrinsic (as first arg), for now we supply
11354 // the tag that is in input address arg (common use case).
11355 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11356 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11357 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11358 return Builder.CreateCall(
11359 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11360 }
11361 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11362 Value *PointerA = EmitScalarExpr(E->getArg(0));
11363 Value *PointerB = EmitScalarExpr(E->getArg(1));
11364 PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
11365 PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
11366 return Builder.CreateCall(
11367 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11368 }
11369 }
11370
11371 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11372 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11373 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11374 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11375 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11376 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11377 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11378 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11379
11380 SpecialRegisterAccessKind AccessKind = Write;
11381 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11382 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11383 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11384 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11385 AccessKind = VolatileRead;
11386
11387 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11388 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11389
11390 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11391 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11392
11393 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11394 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11395
11396 llvm::Type *ValueType;
11397 llvm::Type *RegisterType = Int64Ty;
11398 if (Is32Bit) {
11399 ValueType = Int32Ty;
11400 } else if (Is128Bit) {
11401 llvm::Type *Int128Ty =
11402 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11403 ValueType = Int128Ty;
11404 RegisterType = Int128Ty;
11405 } else if (IsPointerBuiltin) {
11406 ValueType = VoidPtrTy;
11407 } else {
11408 ValueType = Int64Ty;
11409 };
11410
11411 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11412 AccessKind);
11413 }
11414
11415 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11416 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11417 LLVMContext &Context = CGM.getLLVMContext();
11418
11419 unsigned SysReg =
11420 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11421
11422 std::string SysRegStr;
11423 llvm::raw_string_ostream(SysRegStr) <<
11424 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11425 ((SysReg >> 11) & 7) << ":" <<
11426 ((SysReg >> 7) & 15) << ":" <<
11427 ((SysReg >> 3) & 15) << ":" <<
11428 ( SysReg & 7);
11429
11430 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11431 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11432 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11433
11434 llvm::Type *RegisterType = Int64Ty;
11435 llvm::Type *Types[] = { RegisterType };
11436
11437 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11438 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11439
11440 return Builder.CreateCall(F, Metadata);
11441 }
11442
11443 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11444 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11445
11446 return Builder.CreateCall(F, { Metadata, ArgValue });
11447 }
11448
11449 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11450 llvm::Function *F =
11451 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11452 return Builder.CreateCall(F);
11453 }
11454
11455 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11456 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11457 return Builder.CreateCall(F);
11458 }
11459
11460 if (BuiltinID == clang::AArch64::BI__mulh ||
11461 BuiltinID == clang::AArch64::BI__umulh) {
11462 llvm::Type *ResType = ConvertType(E->getType());
11463 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11464
11465 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11466 Value *LHS =
11467 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11468 Value *RHS =
11469 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11470
11471 Value *MulResult, *HigherBits;
11472 if (IsSigned) {
11473 MulResult = Builder.CreateNSWMul(LHS, RHS);
11474 HigherBits = Builder.CreateAShr(MulResult, 64);
11475 } else {
11476 MulResult = Builder.CreateNUWMul(LHS, RHS);
11477 HigherBits = Builder.CreateLShr(MulResult, 64);
11478 }
11479 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11480
11481 return HigherBits;
11482 }
11483
11484 if (BuiltinID == AArch64::BI__writex18byte ||
11485 BuiltinID == AArch64::BI__writex18word ||
11486 BuiltinID == AArch64::BI__writex18dword ||
11487 BuiltinID == AArch64::BI__writex18qword) {
11488 // Read x18 as i8*
11489 LLVMContext &Context = CGM.getLLVMContext();
11490 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11491 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11492 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11493 llvm::Function *F =
11494 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11495 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11496 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11497
11498 // Store val at x18 + offset
11499 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11500 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11501 Value *Val = EmitScalarExpr(E->getArg(1));
11502 StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
11503 return Store;
11504 }
11505
11506 if (BuiltinID == AArch64::BI__readx18byte ||
11507 BuiltinID == AArch64::BI__readx18word ||
11508 BuiltinID == AArch64::BI__readx18dword ||
11509 BuiltinID == AArch64::BI__readx18qword) {
11510 llvm::Type *IntTy = ConvertType(E->getType());
11511
11512 // Read x18 as i8*
11513 LLVMContext &Context = CGM.getLLVMContext();
11514 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11515 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11516 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11517 llvm::Function *F =
11518 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11519 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11520 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11521
11522 // Load x18 + offset
11523 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11524 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11525 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11526 return Load;
11527 }
11528
11529 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11530 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11531 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11532 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11533 Value *Arg = EmitScalarExpr(E->getArg(0));
11534 llvm::Type *RetTy = ConvertType(E->getType());
11535 return Builder.CreateBitCast(Arg, RetTy);
11536 }
11537
11538 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11539 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11540 BuiltinID == AArch64::BI_CountLeadingZeros ||
11541 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11542 Value *Arg = EmitScalarExpr(E->getArg(0));
11543 llvm::Type *ArgType = Arg->getType();
11544
11545 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11546 BuiltinID == AArch64::BI_CountLeadingOnes64)
11547 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11548
11549 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11550 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11551
11552 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11553 BuiltinID == AArch64::BI_CountLeadingZeros64)
11554 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11555 return Result;
11556 }
11557
11558 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11559 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11560 Value *Arg = EmitScalarExpr(E->getArg(0));
11561
11562 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11563 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11564 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11565
11566 Value *Result = Builder.CreateCall(F, Arg, "cls");
11567 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11568 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11569 return Result;
11570 }
11571
11572 if (BuiltinID == AArch64::BI_CountOneBits ||
11573 BuiltinID == AArch64::BI_CountOneBits64) {
11574 Value *ArgValue = EmitScalarExpr(E->getArg(0));
11575 llvm::Type *ArgType = ArgValue->getType();
11576 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11577
11578 Value *Result = Builder.CreateCall(F, ArgValue);
11579 if (BuiltinID == AArch64::BI_CountOneBits64)
11580 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11581 return Result;
11582 }
11583
11584 if (BuiltinID == AArch64::BI__prefetch) {
11585 Value *Address = EmitScalarExpr(E->getArg(0));
11586 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11587 Value *Locality = ConstantInt::get(Int32Ty, 3);
11588 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11589 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11590 return Builder.CreateCall(F, {Address, RW, Locality, Data});
11591 }
11592
11593 if (BuiltinID == AArch64::BI__hlt) {
11594 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
11595 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11596
11597 // Return 0 for convenience, even though MSVC returns some other undefined
11598 // value.
11599 return ConstantInt::get(Builder.getInt32Ty(), 0);
11600 }
11601
11602 // Handle MSVC intrinsics before argument evaluation to prevent double
11603 // evaluation.
11604 if (std::optional<MSVCIntrin> MsvcIntId =
11606 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11607
11608 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11609 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11610 return P.first == BuiltinID;
11611 });
11612 if (It != end(NEONEquivalentIntrinsicMap))
11613 BuiltinID = It->second;
11614
11615 // Find out if any arguments are required to be integer constant
11616 // expressions.
11617 unsigned ICEArguments = 0;
11619 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11620 assert(Error == ASTContext::GE_None && "Should not codegen an error");
11621
11623 Address PtrOp0 = Address::invalid();
11624 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11625 if (i == 0) {
11626 switch (BuiltinID) {
11627 case NEON::BI__builtin_neon_vld1_v:
11628 case NEON::BI__builtin_neon_vld1q_v:
11629 case NEON::BI__builtin_neon_vld1_dup_v:
11630 case NEON::BI__builtin_neon_vld1q_dup_v:
11631 case NEON::BI__builtin_neon_vld1_lane_v:
11632 case NEON::BI__builtin_neon_vld1q_lane_v:
11633 case NEON::BI__builtin_neon_vst1_v:
11634 case NEON::BI__builtin_neon_vst1q_v:
11635 case NEON::BI__builtin_neon_vst1_lane_v:
11636 case NEON::BI__builtin_neon_vst1q_lane_v:
11637 case NEON::BI__builtin_neon_vldap1_lane_s64:
11638 case NEON::BI__builtin_neon_vldap1q_lane_s64:
11639 case NEON::BI__builtin_neon_vstl1_lane_s64:
11640 case NEON::BI__builtin_neon_vstl1q_lane_s64:
11641 // Get the alignment for the argument in addition to the value;
11642 // we'll use it later.
11643 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11644 Ops.push_back(PtrOp0.emitRawPointer(*this));
11645 continue;
11646 }
11647 }
11648 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11649 }
11650
11651 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
11652 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
11653 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
11654
11655 if (Builtin) {
11656 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
11657 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
11658 assert(Result && "SISD intrinsic should have been handled");
11659 return Result;
11660 }
11661
11662 const Expr *Arg = E->getArg(E->getNumArgs()-1);
11664 if (std::optional<llvm::APSInt> Result =
11666 // Determine the type of this overloaded NEON intrinsic.
11667 Type = NeonTypeFlags(Result->getZExtValue());
11668
11669 bool usgn = Type.isUnsigned();
11670 bool quad = Type.isQuad();
11671
11672 // Handle non-overloaded intrinsics first.
11673 switch (BuiltinID) {
11674 default: break;
11675 case NEON::BI__builtin_neon_vabsh_f16:
11676 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11677 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
11678 case NEON::BI__builtin_neon_vaddq_p128: {
11679 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
11680 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11681 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11682 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11683 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
11684 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11685 return Builder.CreateBitCast(Ops[0], Int128Ty);
11686 }
11687 case NEON::BI__builtin_neon_vldrq_p128: {
11688 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11689 Value *Ptr = EmitScalarExpr(E->getArg(0));
11690 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
11692 }
11693 case NEON::BI__builtin_neon_vstrq_p128: {
11694 Value *Ptr = Ops[0];
11695 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
11696 }
11697 case NEON::BI__builtin_neon_vcvts_f32_u32:
11698 case NEON::BI__builtin_neon_vcvtd_f64_u64:
11699 usgn = true;
11700 [[fallthrough]];
11701 case NEON::BI__builtin_neon_vcvts_f32_s32:
11702 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
11703 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11704 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
11705 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
11706 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
11707 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11708 if (usgn)
11709 return Builder.CreateUIToFP(Ops[0], FTy);
11710 return Builder.CreateSIToFP(Ops[0], FTy);
11711 }
11712 case NEON::BI__builtin_neon_vcvth_f16_u16:
11713 case NEON::BI__builtin_neon_vcvth_f16_u32:
11714 case NEON::BI__builtin_neon_vcvth_f16_u64:
11715 usgn = true;
11716 [[fallthrough]];
11717 case NEON::BI__builtin_neon_vcvth_f16_s16:
11718 case NEON::BI__builtin_neon_vcvth_f16_s32:
11719 case NEON::BI__builtin_neon_vcvth_f16_s64: {
11720 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11721 llvm::Type *FTy = HalfTy;
11722 llvm::Type *InTy;
11723 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
11724 InTy = Int64Ty;
11725 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
11726 InTy = Int32Ty;
11727 else
11728 InTy = Int16Ty;
11729 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11730 if (usgn)
11731 return Builder.CreateUIToFP(Ops[0], FTy);
11732 return Builder.CreateSIToFP(Ops[0], FTy);
11733 }
11734 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11735 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11736 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11737 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11738 case NEON::BI__builtin_neon_vcvth_u16_f16:
11739 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11740 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11741 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11742 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11743 case NEON::BI__builtin_neon_vcvth_s16_f16: {
11744 unsigned Int;
11745 llvm::Type* InTy = Int32Ty;
11746 llvm::Type* FTy = HalfTy;
11747 llvm::Type *Tys[2] = {InTy, FTy};
11748 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11749 switch (BuiltinID) {
11750 default: llvm_unreachable("missing builtin ID in switch!");
11751 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11752 Int = Intrinsic::aarch64_neon_fcvtau; break;
11753 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11754 Int = Intrinsic::aarch64_neon_fcvtmu; break;
11755 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11756 Int = Intrinsic::aarch64_neon_fcvtnu; break;
11757 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11758 Int = Intrinsic::aarch64_neon_fcvtpu; break;
11759 case NEON::BI__builtin_neon_vcvth_u16_f16:
11760 Int = Intrinsic::aarch64_neon_fcvtzu; break;
11761 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11762 Int = Intrinsic::aarch64_neon_fcvtas; break;
11763 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11764 Int = Intrinsic::aarch64_neon_fcvtms; break;
11765 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11766 Int = Intrinsic::aarch64_neon_fcvtns; break;
11767 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11768 Int = Intrinsic::aarch64_neon_fcvtps; break;
11769 case NEON::BI__builtin_neon_vcvth_s16_f16:
11770 Int = Intrinsic::aarch64_neon_fcvtzs; break;
11771 }
11772 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
11773 return Builder.CreateTrunc(Ops[0], Int16Ty);
11774 }
11775 case NEON::BI__builtin_neon_vcaleh_f16:
11776 case NEON::BI__builtin_neon_vcalth_f16:
11777 case NEON::BI__builtin_neon_vcageh_f16:
11778 case NEON::BI__builtin_neon_vcagth_f16: {
11779 unsigned Int;
11780 llvm::Type* InTy = Int32Ty;
11781 llvm::Type* FTy = HalfTy;
11782 llvm::Type *Tys[2] = {InTy, FTy};
11783 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11784 switch (BuiltinID) {
11785 default: llvm_unreachable("missing builtin ID in switch!");
11786 case NEON::BI__builtin_neon_vcageh_f16:
11787 Int = Intrinsic::aarch64_neon_facge; break;
11788 case NEON::BI__builtin_neon_vcagth_f16:
11789 Int = Intrinsic::aarch64_neon_facgt; break;
11790 case NEON::BI__builtin_neon_vcaleh_f16:
11791 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
11792 case NEON::BI__builtin_neon_vcalth_f16:
11793 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
11794 }
11795 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
11796 return Builder.CreateTrunc(Ops[0], Int16Ty);
11797 }
11798 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11799 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
11800 unsigned Int;
11801 llvm::Type* InTy = Int32Ty;
11802 llvm::Type* FTy = HalfTy;
11803 llvm::Type *Tys[2] = {InTy, FTy};
11804 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11805 switch (BuiltinID) {
11806 default: llvm_unreachable("missing builtin ID in switch!");
11807 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11808 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
11809 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
11810 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
11811 }
11812 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11813 return Builder.CreateTrunc(Ops[0], Int16Ty);
11814 }
11815 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11816 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
11817 unsigned Int;
11818 llvm::Type* FTy = HalfTy;
11819 llvm::Type* InTy = Int32Ty;
11820 llvm::Type *Tys[2] = {FTy, InTy};
11821 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11822 switch (BuiltinID) {
11823 default: llvm_unreachable("missing builtin ID in switch!");
11824 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11825 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
11826 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
11827 break;
11828 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
11829 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
11830 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
11831 break;
11832 }
11833 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11834 }
11835 case NEON::BI__builtin_neon_vpaddd_s64: {
11836 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
11837 Value *Vec = EmitScalarExpr(E->getArg(0));
11838 // The vector is v2f64, so make sure it's bitcast to that.
11839 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
11840 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11841 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11842 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11843 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11844 // Pairwise addition of a v2f64 into a scalar f64.
11845 return Builder.CreateAdd(Op0, Op1, "vpaddd");
11846 }
11847 case NEON::BI__builtin_neon_vpaddd_f64: {
11848 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
11849 Value *Vec = EmitScalarExpr(E->getArg(0));
11850 // The vector is v2f64, so make sure it's bitcast to that.
11851 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
11852 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11853 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11854 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11855 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11856 // Pairwise addition of a v2f64 into a scalar f64.
11857 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11858 }
11859 case NEON::BI__builtin_neon_vpadds_f32: {
11860 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
11861 Value *Vec = EmitScalarExpr(E->getArg(0));
11862 // The vector is v2f32, so make sure it's bitcast to that.
11863 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
11864 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11865 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11866 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11867 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11868 // Pairwise addition of a v2f32 into a scalar f32.
11869 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11870 }
11871 case NEON::BI__builtin_neon_vceqzd_s64:
11872 case NEON::BI__builtin_neon_vceqzd_f64:
11873 case NEON::BI__builtin_neon_vceqzs_f32:
11874 case NEON::BI__builtin_neon_vceqzh_f16:
11875 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11877 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11878 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
11879 case NEON::BI__builtin_neon_vcgezd_s64:
11880 case NEON::BI__builtin_neon_vcgezd_f64:
11881 case NEON::BI__builtin_neon_vcgezs_f32:
11882 case NEON::BI__builtin_neon_vcgezh_f16:
11883 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11885 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11886 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
11887 case NEON::BI__builtin_neon_vclezd_s64:
11888 case NEON::BI__builtin_neon_vclezd_f64:
11889 case NEON::BI__builtin_neon_vclezs_f32:
11890 case NEON::BI__builtin_neon_vclezh_f16:
11891 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11893 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11894 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
11895 case NEON::BI__builtin_neon_vcgtzd_s64:
11896 case NEON::BI__builtin_neon_vcgtzd_f64:
11897 case NEON::BI__builtin_neon_vcgtzs_f32:
11898 case NEON::BI__builtin_neon_vcgtzh_f16:
11899 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11901 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11902 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
11903 case NEON::BI__builtin_neon_vcltzd_s64:
11904 case NEON::BI__builtin_neon_vcltzd_f64:
11905 case NEON::BI__builtin_neon_vcltzs_f32:
11906 case NEON::BI__builtin_neon_vcltzh_f16:
11907 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11909 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11910 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
11911
11912 case NEON::BI__builtin_neon_vceqzd_u64: {
11913 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11914 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11915 Ops[0] =
11916 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
11917 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
11918 }
11919 case NEON::BI__builtin_neon_vceqd_f64:
11920 case NEON::BI__builtin_neon_vcled_f64:
11921 case NEON::BI__builtin_neon_vcltd_f64:
11922 case NEON::BI__builtin_neon_vcged_f64:
11923 case NEON::BI__builtin_neon_vcgtd_f64: {
11924 llvm::CmpInst::Predicate P;
11925 switch (BuiltinID) {
11926 default: llvm_unreachable("missing builtin ID in switch!");
11927 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
11928 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
11929 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
11930 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
11931 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
11932 }
11933 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11934 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11935 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11936 if (P == llvm::FCmpInst::FCMP_OEQ)
11937 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11938 else
11939 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11940 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
11941 }
11942 case NEON::BI__builtin_neon_vceqs_f32:
11943 case NEON::BI__builtin_neon_vcles_f32:
11944 case NEON::BI__builtin_neon_vclts_f32:
11945 case NEON::BI__builtin_neon_vcges_f32:
11946 case NEON::BI__builtin_neon_vcgts_f32: {
11947 llvm::CmpInst::Predicate P;
11948 switch (BuiltinID) {
11949 default: llvm_unreachable("missing builtin ID in switch!");
11950 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
11951 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
11952 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
11953 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
11954 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
11955 }
11956 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11957 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
11958 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
11959 if (P == llvm::FCmpInst::FCMP_OEQ)
11960 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11961 else
11962 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11963 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
11964 }
11965 case NEON::BI__builtin_neon_vceqh_f16:
11966 case NEON::BI__builtin_neon_vcleh_f16:
11967 case NEON::BI__builtin_neon_vclth_f16:
11968 case NEON::BI__builtin_neon_vcgeh_f16:
11969 case NEON::BI__builtin_neon_vcgth_f16: {
11970 llvm::CmpInst::Predicate P;
11971 switch (BuiltinID) {
11972 default: llvm_unreachable("missing builtin ID in switch!");
11973 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
11974 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
11975 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
11976 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
11977 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
11978 }
11979 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11980 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
11981 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
11982 if (P == llvm::FCmpInst::FCMP_OEQ)
11983 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11984 else
11985 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11986 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
11987 }
11988 case NEON::BI__builtin_neon_vceqd_s64:
11989 case NEON::BI__builtin_neon_vceqd_u64:
11990 case NEON::BI__builtin_neon_vcgtd_s64:
11991 case NEON::BI__builtin_neon_vcgtd_u64:
11992 case NEON::BI__builtin_neon_vcltd_s64:
11993 case NEON::BI__builtin_neon_vcltd_u64:
11994 case NEON::BI__builtin_neon_vcged_u64:
11995 case NEON::BI__builtin_neon_vcged_s64:
11996 case NEON::BI__builtin_neon_vcled_u64:
11997 case NEON::BI__builtin_neon_vcled_s64: {
11998 llvm::CmpInst::Predicate P;
11999 switch (BuiltinID) {
12000 default: llvm_unreachable("missing builtin ID in switch!");
12001 case NEON::BI__builtin_neon_vceqd_s64:
12002 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12003 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12004 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12005 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12006 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12007 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12008 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12009 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12010 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12011 }
12012 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12013 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12014 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12015 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12016 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12017 }
12018 case NEON::BI__builtin_neon_vtstd_s64:
12019 case NEON::BI__builtin_neon_vtstd_u64: {
12020 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12021 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12022 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12023 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12024 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12025 llvm::Constant::getNullValue(Int64Ty));
12026 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12027 }
12028 case NEON::BI__builtin_neon_vset_lane_i8:
12029 case NEON::BI__builtin_neon_vset_lane_i16:
12030 case NEON::BI__builtin_neon_vset_lane_i32:
12031 case NEON::BI__builtin_neon_vset_lane_i64:
12032 case NEON::BI__builtin_neon_vset_lane_bf16:
12033 case NEON::BI__builtin_neon_vset_lane_f32:
12034 case NEON::BI__builtin_neon_vsetq_lane_i8:
12035 case NEON::BI__builtin_neon_vsetq_lane_i16:
12036 case NEON::BI__builtin_neon_vsetq_lane_i32:
12037 case NEON::BI__builtin_neon_vsetq_lane_i64:
12038 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12039 case NEON::BI__builtin_neon_vsetq_lane_f32:
12040 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12041 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12042 case NEON::BI__builtin_neon_vset_lane_f64:
12043 // The vector type needs a cast for the v1f64 variant.
12044 Ops[1] =
12045 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12046 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12047 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12048 case NEON::BI__builtin_neon_vsetq_lane_f64:
12049 // The vector type needs a cast for the v2f64 variant.
12050 Ops[1] =
12051 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12052 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12053 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12054
12055 case NEON::BI__builtin_neon_vget_lane_i8:
12056 case NEON::BI__builtin_neon_vdupb_lane_i8:
12057 Ops[0] =
12058 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12059 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12060 "vget_lane");
12061 case NEON::BI__builtin_neon_vgetq_lane_i8:
12062 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12063 Ops[0] =
12064 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12065 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12066 "vgetq_lane");
12067 case NEON::BI__builtin_neon_vget_lane_i16:
12068 case NEON::BI__builtin_neon_vduph_lane_i16:
12069 Ops[0] =
12070 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12071 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12072 "vget_lane");
12073 case NEON::BI__builtin_neon_vgetq_lane_i16:
12074 case NEON::BI__builtin_neon_vduph_laneq_i16:
12075 Ops[0] =
12076 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12077 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12078 "vgetq_lane");
12079 case NEON::BI__builtin_neon_vget_lane_i32:
12080 case NEON::BI__builtin_neon_vdups_lane_i32:
12081 Ops[0] =
12082 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12083 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12084 "vget_lane");
12085 case NEON::BI__builtin_neon_vdups_lane_f32:
12086 Ops[0] =
12087 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12088 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12089 "vdups_lane");
12090 case NEON::BI__builtin_neon_vgetq_lane_i32:
12091 case NEON::BI__builtin_neon_vdups_laneq_i32:
12092 Ops[0] =
12093 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12094 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12095 "vgetq_lane");
12096 case NEON::BI__builtin_neon_vget_lane_i64:
12097 case NEON::BI__builtin_neon_vdupd_lane_i64:
12098 Ops[0] =
12099 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12100 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12101 "vget_lane");
12102 case NEON::BI__builtin_neon_vdupd_lane_f64:
12103 Ops[0] =
12104 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12105 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12106 "vdupd_lane");
12107 case NEON::BI__builtin_neon_vgetq_lane_i64:
12108 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12109 Ops[0] =
12110 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12111 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12112 "vgetq_lane");
12113 case NEON::BI__builtin_neon_vget_lane_f32:
12114 Ops[0] =
12115 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12116 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12117 "vget_lane");
12118 case NEON::BI__builtin_neon_vget_lane_f64:
12119 Ops[0] =
12120 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12121 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12122 "vget_lane");
12123 case NEON::BI__builtin_neon_vgetq_lane_f32:
12124 case NEON::BI__builtin_neon_vdups_laneq_f32:
12125 Ops[0] =
12126 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12127 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12128 "vgetq_lane");
12129 case NEON::BI__builtin_neon_vgetq_lane_f64:
12130 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12131 Ops[0] =
12132 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12133 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12134 "vgetq_lane");
12135 case NEON::BI__builtin_neon_vaddh_f16:
12136 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12137 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12138 case NEON::BI__builtin_neon_vsubh_f16:
12139 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12140 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12141 case NEON::BI__builtin_neon_vmulh_f16:
12142 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12143 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12144 case NEON::BI__builtin_neon_vdivh_f16:
12145 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12146 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12147 case NEON::BI__builtin_neon_vfmah_f16:
12148 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12150 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12151 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12152 case NEON::BI__builtin_neon_vfmsh_f16: {
12153 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12154
12155 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12157 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12158 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12159 }
12160 case NEON::BI__builtin_neon_vaddd_s64:
12161 case NEON::BI__builtin_neon_vaddd_u64:
12162 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12163 case NEON::BI__builtin_neon_vsubd_s64:
12164 case NEON::BI__builtin_neon_vsubd_u64:
12165 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12166 case NEON::BI__builtin_neon_vqdmlalh_s16:
12167 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12168 SmallVector<Value *, 2> ProductOps;
12169 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12170 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12171 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12172 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12173 ProductOps, "vqdmlXl");
12174 Constant *CI = ConstantInt::get(SizeTy, 0);
12175 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12176
12177 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12178 ? Intrinsic::aarch64_neon_sqadd
12179 : Intrinsic::aarch64_neon_sqsub;
12180 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12181 }
12182 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12183 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12184 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12185 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12186 Ops, "vqshlu_n");
12187 }
12188 case NEON::BI__builtin_neon_vqshld_n_u64:
12189 case NEON::BI__builtin_neon_vqshld_n_s64: {
12190 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12191 ? Intrinsic::aarch64_neon_uqshl
12192 : Intrinsic::aarch64_neon_sqshl;
12193 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12194 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12195 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12196 }
12197 case NEON::BI__builtin_neon_vrshrd_n_u64:
12198 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12199 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12200 ? Intrinsic::aarch64_neon_urshl
12201 : Intrinsic::aarch64_neon_srshl;
12202 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12203 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12204 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12205 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12206 }
12207 case NEON::BI__builtin_neon_vrsrad_n_u64:
12208 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12209 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12210 ? Intrinsic::aarch64_neon_urshl
12211 : Intrinsic::aarch64_neon_srshl;
12212 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12213 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12214 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12215 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12216 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12217 }
12218 case NEON::BI__builtin_neon_vshld_n_s64:
12219 case NEON::BI__builtin_neon_vshld_n_u64: {
12220 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12221 return Builder.CreateShl(
12222 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12223 }
12224 case NEON::BI__builtin_neon_vshrd_n_s64: {
12225 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12226 return Builder.CreateAShr(
12227 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12228 Amt->getZExtValue())),
12229 "shrd_n");
12230 }
12231 case NEON::BI__builtin_neon_vshrd_n_u64: {
12232 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12233 uint64_t ShiftAmt = Amt->getZExtValue();
12234 // Right-shifting an unsigned value by its size yields 0.
12235 if (ShiftAmt == 64)
12236 return ConstantInt::get(Int64Ty, 0);
12237 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12238 "shrd_n");
12239 }
12240 case NEON::BI__builtin_neon_vsrad_n_s64: {
12241 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12242 Ops[1] = Builder.CreateAShr(
12243 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12244 Amt->getZExtValue())),
12245 "shrd_n");
12246 return Builder.CreateAdd(Ops[0], Ops[1]);
12247 }
12248 case NEON::BI__builtin_neon_vsrad_n_u64: {
12249 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12250 uint64_t ShiftAmt = Amt->getZExtValue();
12251 // Right-shifting an unsigned value by its size yields 0.
12252 // As Op + 0 = Op, return Ops[0] directly.
12253 if (ShiftAmt == 64)
12254 return Ops[0];
12255 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12256 "shrd_n");
12257 return Builder.CreateAdd(Ops[0], Ops[1]);
12258 }
12259 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12260 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12261 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12262 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12263 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12264 "lane");
12265 SmallVector<Value *, 2> ProductOps;
12266 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12267 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12268 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12269 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12270 ProductOps, "vqdmlXl");
12271 Constant *CI = ConstantInt::get(SizeTy, 0);
12272 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12273 Ops.pop_back();
12274
12275 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12276 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12277 ? Intrinsic::aarch64_neon_sqadd
12278 : Intrinsic::aarch64_neon_sqsub;
12279 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12280 }
12281 case NEON::BI__builtin_neon_vqdmlals_s32:
12282 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12283 SmallVector<Value *, 2> ProductOps;
12284 ProductOps.push_back(Ops[1]);
12285 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12286 Ops[1] =
12287 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12288 ProductOps, "vqdmlXl");
12289
12290 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12291 ? Intrinsic::aarch64_neon_sqadd
12292 : Intrinsic::aarch64_neon_sqsub;
12293 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12294 }
12295 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12296 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12297 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12298 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12299 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12300 "lane");
12301 SmallVector<Value *, 2> ProductOps;
12302 ProductOps.push_back(Ops[1]);
12303 ProductOps.push_back(Ops[2]);
12304 Ops[1] =
12305 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12306 ProductOps, "vqdmlXl");
12307 Ops.pop_back();
12308
12309 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12310 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12311 ? Intrinsic::aarch64_neon_sqadd
12312 : Intrinsic::aarch64_neon_sqsub;
12313 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12314 }
12315 case NEON::BI__builtin_neon_vget_lane_bf16:
12316 case NEON::BI__builtin_neon_vduph_lane_bf16:
12317 case NEON::BI__builtin_neon_vduph_lane_f16: {
12318 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12319 "vget_lane");
12320 }
12321 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12322 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12323 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12324 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12325 "vgetq_lane");
12326 }
12327
12328 case clang::AArch64::BI_InterlockedAdd:
12329 case clang::AArch64::BI_InterlockedAdd64: {
12330 Address DestAddr = CheckAtomicAlignment(*this, E);
12331 Value *Val = EmitScalarExpr(E->getArg(1));
12332 AtomicRMWInst *RMWI =
12333 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12334 llvm::AtomicOrdering::SequentiallyConsistent);
12335 return Builder.CreateAdd(RMWI, Val);
12336 }
12337 }
12338
12339 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12340 llvm::Type *Ty = VTy;
12341 if (!Ty)
12342 return nullptr;
12343
12344 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12345 // defer to common code if it's been added to our special map.
12348
12349 if (Builtin)
12351 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12352 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12353 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12354
12355 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12356 return V;
12357
12358 unsigned Int;
12359 switch (BuiltinID) {
12360 default: return nullptr;
12361 case NEON::BI__builtin_neon_vbsl_v:
12362 case NEON::BI__builtin_neon_vbslq_v: {
12363 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12364 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12365 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12366 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12367
12368 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12369 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12370 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12371 return Builder.CreateBitCast(Ops[0], Ty);
12372 }
12373 case NEON::BI__builtin_neon_vfma_lane_v:
12374 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12375 // The ARM builtins (and instructions) have the addend as the first
12376 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12377 Value *Addend = Ops[0];
12378 Value *Multiplicand = Ops[1];
12379 Value *LaneSource = Ops[2];
12380 Ops[0] = Multiplicand;
12381 Ops[1] = LaneSource;
12382 Ops[2] = Addend;
12383
12384 // Now adjust things to handle the lane access.
12385 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12386 ? llvm::FixedVectorType::get(VTy->getElementType(),
12387 VTy->getNumElements() / 2)
12388 : VTy;
12389 llvm::Constant *cst = cast<Constant>(Ops[3]);
12390 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12391 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12392 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12393
12394 Ops.pop_back();
12395 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12396 : Intrinsic::fma;
12397 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12398 }
12399 case NEON::BI__builtin_neon_vfma_laneq_v: {
12400 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12401 // v1f64 fma should be mapped to Neon scalar f64 fma
12402 if (VTy && VTy->getElementType() == DoubleTy) {
12403 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12404 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12405 llvm::FixedVectorType *VTy =
12407 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12408 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12409 Value *Result;
12411 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12412 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12413 return Builder.CreateBitCast(Result, Ty);
12414 }
12415 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12416 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12417
12418 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12419 VTy->getNumElements() * 2);
12420 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12421 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12422 cast<ConstantInt>(Ops[3]));
12423 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12424
12426 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12427 {Ops[2], Ops[1], Ops[0]});
12428 }
12429 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12430 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12431 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12432
12433 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12434 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12436 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12437 {Ops[2], Ops[1], Ops[0]});
12438 }
12439 case NEON::BI__builtin_neon_vfmah_lane_f16:
12440 case NEON::BI__builtin_neon_vfmas_lane_f32:
12441 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12442 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12443 case NEON::BI__builtin_neon_vfmad_lane_f64:
12444 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12445 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12446 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12447 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12449 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12450 {Ops[1], Ops[2], Ops[0]});
12451 }
12452 case NEON::BI__builtin_neon_vmull_v:
12453 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12454 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12455 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12456 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12457 case NEON::BI__builtin_neon_vmax_v:
12458 case NEON::BI__builtin_neon_vmaxq_v:
12459 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12460 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12461 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12462 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12463 case NEON::BI__builtin_neon_vmaxh_f16: {
12464 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12465 Int = Intrinsic::aarch64_neon_fmax;
12466 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12467 }
12468 case NEON::BI__builtin_neon_vmin_v:
12469 case NEON::BI__builtin_neon_vminq_v:
12470 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12471 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12472 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12473 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12474 case NEON::BI__builtin_neon_vminh_f16: {
12475 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12476 Int = Intrinsic::aarch64_neon_fmin;
12477 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12478 }
12479 case NEON::BI__builtin_neon_vabd_v:
12480 case NEON::BI__builtin_neon_vabdq_v:
12481 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12482 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12483 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12484 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12485 case NEON::BI__builtin_neon_vpadal_v:
12486 case NEON::BI__builtin_neon_vpadalq_v: {
12487 unsigned ArgElts = VTy->getNumElements();
12488 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12489 unsigned BitWidth = EltTy->getBitWidth();
12490 auto *ArgTy = llvm::FixedVectorType::get(
12491 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12492 llvm::Type* Tys[2] = { VTy, ArgTy };
12493 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12495 TmpOps.push_back(Ops[1]);
12496 Function *F = CGM.getIntrinsic(Int, Tys);
12497 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12498 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12499 return Builder.CreateAdd(tmp, addend);
12500 }
12501 case NEON::BI__builtin_neon_vpmin_v:
12502 case NEON::BI__builtin_neon_vpminq_v:
12503 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12504 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12505 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12506 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12507 case NEON::BI__builtin_neon_vpmax_v:
12508 case NEON::BI__builtin_neon_vpmaxq_v:
12509 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12510 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12511 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12512 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12513 case NEON::BI__builtin_neon_vminnm_v:
12514 case NEON::BI__builtin_neon_vminnmq_v:
12515 Int = Intrinsic::aarch64_neon_fminnm;
12516 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12517 case NEON::BI__builtin_neon_vminnmh_f16:
12518 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12519 Int = Intrinsic::aarch64_neon_fminnm;
12520 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12521 case NEON::BI__builtin_neon_vmaxnm_v:
12522 case NEON::BI__builtin_neon_vmaxnmq_v:
12523 Int = Intrinsic::aarch64_neon_fmaxnm;
12524 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12525 case NEON::BI__builtin_neon_vmaxnmh_f16:
12526 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12527 Int = Intrinsic::aarch64_neon_fmaxnm;
12528 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12529 case NEON::BI__builtin_neon_vrecpss_f32: {
12530 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12531 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12532 Ops, "vrecps");
12533 }
12534 case NEON::BI__builtin_neon_vrecpsd_f64:
12535 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12536 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12537 Ops, "vrecps");
12538 case NEON::BI__builtin_neon_vrecpsh_f16:
12539 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12540 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12541 Ops, "vrecps");
12542 case NEON::BI__builtin_neon_vqshrun_n_v:
12543 Int = Intrinsic::aarch64_neon_sqshrun;
12544 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12545 case NEON::BI__builtin_neon_vqrshrun_n_v:
12546 Int = Intrinsic::aarch64_neon_sqrshrun;
12547 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12548 case NEON::BI__builtin_neon_vqshrn_n_v:
12549 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12550 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12551 case NEON::BI__builtin_neon_vrshrn_n_v:
12552 Int = Intrinsic::aarch64_neon_rshrn;
12553 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12554 case NEON::BI__builtin_neon_vqrshrn_n_v:
12555 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12556 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12557 case NEON::BI__builtin_neon_vrndah_f16: {
12558 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12559 Int = Builder.getIsFPConstrained()
12560 ? Intrinsic::experimental_constrained_round
12561 : Intrinsic::round;
12562 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12563 }
12564 case NEON::BI__builtin_neon_vrnda_v:
12565 case NEON::BI__builtin_neon_vrndaq_v: {
12566 Int = Builder.getIsFPConstrained()
12567 ? Intrinsic::experimental_constrained_round
12568 : Intrinsic::round;
12569 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12570 }
12571 case NEON::BI__builtin_neon_vrndih_f16: {
12572 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12573 Int = Builder.getIsFPConstrained()
12574 ? Intrinsic::experimental_constrained_nearbyint
12575 : Intrinsic::nearbyint;
12576 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12577 }
12578 case NEON::BI__builtin_neon_vrndmh_f16: {
12579 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12580 Int = Builder.getIsFPConstrained()
12581 ? Intrinsic::experimental_constrained_floor
12582 : Intrinsic::floor;
12583 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12584 }
12585 case NEON::BI__builtin_neon_vrndm_v:
12586 case NEON::BI__builtin_neon_vrndmq_v: {
12587 Int = Builder.getIsFPConstrained()
12588 ? Intrinsic::experimental_constrained_floor
12589 : Intrinsic::floor;
12590 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12591 }
12592 case NEON::BI__builtin_neon_vrndnh_f16: {
12593 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12594 Int = Builder.getIsFPConstrained()
12595 ? Intrinsic::experimental_constrained_roundeven
12596 : Intrinsic::roundeven;
12597 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12598 }
12599 case NEON::BI__builtin_neon_vrndn_v:
12600 case NEON::BI__builtin_neon_vrndnq_v: {
12601 Int = Builder.getIsFPConstrained()
12602 ? Intrinsic::experimental_constrained_roundeven
12603 : Intrinsic::roundeven;
12604 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12605 }
12606 case NEON::BI__builtin_neon_vrndns_f32: {
12607 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12608 Int = Builder.getIsFPConstrained()
12609 ? Intrinsic::experimental_constrained_roundeven
12610 : Intrinsic::roundeven;
12611 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12612 }
12613 case NEON::BI__builtin_neon_vrndph_f16: {
12614 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12615 Int = Builder.getIsFPConstrained()
12616 ? Intrinsic::experimental_constrained_ceil
12617 : Intrinsic::ceil;
12618 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12619 }
12620 case NEON::BI__builtin_neon_vrndp_v:
12621 case NEON::BI__builtin_neon_vrndpq_v: {
12622 Int = Builder.getIsFPConstrained()
12623 ? Intrinsic::experimental_constrained_ceil
12624 : Intrinsic::ceil;
12625 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
12626 }
12627 case NEON::BI__builtin_neon_vrndxh_f16: {
12628 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12629 Int = Builder.getIsFPConstrained()
12630 ? Intrinsic::experimental_constrained_rint
12631 : Intrinsic::rint;
12632 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
12633 }
12634 case NEON::BI__builtin_neon_vrndx_v:
12635 case NEON::BI__builtin_neon_vrndxq_v: {
12636 Int = Builder.getIsFPConstrained()
12637 ? Intrinsic::experimental_constrained_rint
12638 : Intrinsic::rint;
12639 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
12640 }
12641 case NEON::BI__builtin_neon_vrndh_f16: {
12642 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12643 Int = Builder.getIsFPConstrained()
12644 ? Intrinsic::experimental_constrained_trunc
12645 : Intrinsic::trunc;
12646 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
12647 }
12648 case NEON::BI__builtin_neon_vrnd32x_f32:
12649 case NEON::BI__builtin_neon_vrnd32xq_f32:
12650 case NEON::BI__builtin_neon_vrnd32x_f64:
12651 case NEON::BI__builtin_neon_vrnd32xq_f64: {
12652 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12653 Int = Intrinsic::aarch64_neon_frint32x;
12654 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
12655 }
12656 case NEON::BI__builtin_neon_vrnd32z_f32:
12657 case NEON::BI__builtin_neon_vrnd32zq_f32:
12658 case NEON::BI__builtin_neon_vrnd32z_f64:
12659 case NEON::BI__builtin_neon_vrnd32zq_f64: {
12660 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12661 Int = Intrinsic::aarch64_neon_frint32z;
12662 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
12663 }
12664 case NEON::BI__builtin_neon_vrnd64x_f32:
12665 case NEON::BI__builtin_neon_vrnd64xq_f32:
12666 case NEON::BI__builtin_neon_vrnd64x_f64:
12667 case NEON::BI__builtin_neon_vrnd64xq_f64: {
12668 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12669 Int = Intrinsic::aarch64_neon_frint64x;
12670 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
12671 }
12672 case NEON::BI__builtin_neon_vrnd64z_f32:
12673 case NEON::BI__builtin_neon_vrnd64zq_f32:
12674 case NEON::BI__builtin_neon_vrnd64z_f64:
12675 case NEON::BI__builtin_neon_vrnd64zq_f64: {
12676 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12677 Int = Intrinsic::aarch64_neon_frint64z;
12678 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
12679 }
12680 case NEON::BI__builtin_neon_vrnd_v:
12681 case NEON::BI__builtin_neon_vrndq_v: {
12682 Int = Builder.getIsFPConstrained()
12683 ? Intrinsic::experimental_constrained_trunc
12684 : Intrinsic::trunc;
12685 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
12686 }
12687 case NEON::BI__builtin_neon_vcvt_f64_v:
12688 case NEON::BI__builtin_neon_vcvtq_f64_v:
12689 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12690 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
12691 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
12692 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
12693 case NEON::BI__builtin_neon_vcvt_f64_f32: {
12694 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
12695 "unexpected vcvt_f64_f32 builtin");
12696 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
12697 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12698
12699 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
12700 }
12701 case NEON::BI__builtin_neon_vcvt_f32_f64: {
12702 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
12703 "unexpected vcvt_f32_f64 builtin");
12704 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
12705 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12706
12707 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
12708 }
12709 case NEON::BI__builtin_neon_vcvt_s32_v:
12710 case NEON::BI__builtin_neon_vcvt_u32_v:
12711 case NEON::BI__builtin_neon_vcvt_s64_v:
12712 case NEON::BI__builtin_neon_vcvt_u64_v:
12713 case NEON::BI__builtin_neon_vcvt_s16_f16:
12714 case NEON::BI__builtin_neon_vcvt_u16_f16:
12715 case NEON::BI__builtin_neon_vcvtq_s32_v:
12716 case NEON::BI__builtin_neon_vcvtq_u32_v:
12717 case NEON::BI__builtin_neon_vcvtq_s64_v:
12718 case NEON::BI__builtin_neon_vcvtq_u64_v:
12719 case NEON::BI__builtin_neon_vcvtq_s16_f16:
12720 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
12721 Int =
12722 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
12723 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
12724 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
12725 }
12726 case NEON::BI__builtin_neon_vcvta_s16_f16:
12727 case NEON::BI__builtin_neon_vcvta_u16_f16:
12728 case NEON::BI__builtin_neon_vcvta_s32_v:
12729 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
12730 case NEON::BI__builtin_neon_vcvtaq_s32_v:
12731 case NEON::BI__builtin_neon_vcvta_u32_v:
12732 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
12733 case NEON::BI__builtin_neon_vcvtaq_u32_v:
12734 case NEON::BI__builtin_neon_vcvta_s64_v:
12735 case NEON::BI__builtin_neon_vcvtaq_s64_v:
12736 case NEON::BI__builtin_neon_vcvta_u64_v:
12737 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
12738 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
12739 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12740 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
12741 }
12742 case NEON::BI__builtin_neon_vcvtm_s16_f16:
12743 case NEON::BI__builtin_neon_vcvtm_s32_v:
12744 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
12745 case NEON::BI__builtin_neon_vcvtmq_s32_v:
12746 case NEON::BI__builtin_neon_vcvtm_u16_f16:
12747 case NEON::BI__builtin_neon_vcvtm_u32_v:
12748 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
12749 case NEON::BI__builtin_neon_vcvtmq_u32_v:
12750 case NEON::BI__builtin_neon_vcvtm_s64_v:
12751 case NEON::BI__builtin_neon_vcvtmq_s64_v:
12752 case NEON::BI__builtin_neon_vcvtm_u64_v:
12753 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
12754 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
12755 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12756 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
12757 }
12758 case NEON::BI__builtin_neon_vcvtn_s16_f16:
12759 case NEON::BI__builtin_neon_vcvtn_s32_v:
12760 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
12761 case NEON::BI__builtin_neon_vcvtnq_s32_v:
12762 case NEON::BI__builtin_neon_vcvtn_u16_f16:
12763 case NEON::BI__builtin_neon_vcvtn_u32_v:
12764 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
12765 case NEON::BI__builtin_neon_vcvtnq_u32_v:
12766 case NEON::BI__builtin_neon_vcvtn_s64_v:
12767 case NEON::BI__builtin_neon_vcvtnq_s64_v:
12768 case NEON::BI__builtin_neon_vcvtn_u64_v:
12769 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
12770 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
12771 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12772 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
12773 }
12774 case NEON::BI__builtin_neon_vcvtp_s16_f16:
12775 case NEON::BI__builtin_neon_vcvtp_s32_v:
12776 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
12777 case NEON::BI__builtin_neon_vcvtpq_s32_v:
12778 case NEON::BI__builtin_neon_vcvtp_u16_f16:
12779 case NEON::BI__builtin_neon_vcvtp_u32_v:
12780 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
12781 case NEON::BI__builtin_neon_vcvtpq_u32_v:
12782 case NEON::BI__builtin_neon_vcvtp_s64_v:
12783 case NEON::BI__builtin_neon_vcvtpq_s64_v:
12784 case NEON::BI__builtin_neon_vcvtp_u64_v:
12785 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
12786 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
12787 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12788 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
12789 }
12790 case NEON::BI__builtin_neon_vmulx_v:
12791 case NEON::BI__builtin_neon_vmulxq_v: {
12792 Int = Intrinsic::aarch64_neon_fmulx;
12793 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
12794 }
12795 case NEON::BI__builtin_neon_vmulxh_lane_f16:
12796 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
12797 // vmulx_lane should be mapped to Neon scalar mulx after
12798 // extracting the scalar element
12799 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12800 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12801 Ops.pop_back();
12802 Int = Intrinsic::aarch64_neon_fmulx;
12803 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
12804 }
12805 case NEON::BI__builtin_neon_vmul_lane_v:
12806 case NEON::BI__builtin_neon_vmul_laneq_v: {
12807 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
12808 bool Quad = false;
12809 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
12810 Quad = true;
12811 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12812 llvm::FixedVectorType *VTy =
12814 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12815 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12816 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
12817 return Builder.CreateBitCast(Result, Ty);
12818 }
12819 case NEON::BI__builtin_neon_vnegd_s64:
12820 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
12821 case NEON::BI__builtin_neon_vnegh_f16:
12822 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
12823 case NEON::BI__builtin_neon_vpmaxnm_v:
12824 case NEON::BI__builtin_neon_vpmaxnmq_v: {
12825 Int = Intrinsic::aarch64_neon_fmaxnmp;
12826 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
12827 }
12828 case NEON::BI__builtin_neon_vpminnm_v:
12829 case NEON::BI__builtin_neon_vpminnmq_v: {
12830 Int = Intrinsic::aarch64_neon_fminnmp;
12831 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
12832 }
12833 case NEON::BI__builtin_neon_vsqrth_f16: {
12834 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12835 Int = Builder.getIsFPConstrained()
12836 ? Intrinsic::experimental_constrained_sqrt
12837 : Intrinsic::sqrt;
12838 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
12839 }
12840 case NEON::BI__builtin_neon_vsqrt_v:
12841 case NEON::BI__builtin_neon_vsqrtq_v: {
12842 Int = Builder.getIsFPConstrained()
12843 ? Intrinsic::experimental_constrained_sqrt
12844 : Intrinsic::sqrt;
12845 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12846 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
12847 }
12848 case NEON::BI__builtin_neon_vrbit_v:
12849 case NEON::BI__builtin_neon_vrbitq_v: {
12850 Int = Intrinsic::bitreverse;
12851 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
12852 }
12853 case NEON::BI__builtin_neon_vaddv_u8:
12854 // FIXME: These are handled by the AArch64 scalar code.
12855 usgn = true;
12856 [[fallthrough]];
12857 case NEON::BI__builtin_neon_vaddv_s8: {
12858 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12859 Ty = Int32Ty;
12860 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12861 llvm::Type *Tys[2] = { Ty, VTy };
12862 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12863 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12864 return Builder.CreateTrunc(Ops[0], Int8Ty);
12865 }
12866 case NEON::BI__builtin_neon_vaddv_u16:
12867 usgn = true;
12868 [[fallthrough]];
12869 case NEON::BI__builtin_neon_vaddv_s16: {
12870 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12871 Ty = Int32Ty;
12872 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12873 llvm::Type *Tys[2] = { Ty, VTy };
12874 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12875 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12876 return Builder.CreateTrunc(Ops[0], Int16Ty);
12877 }
12878 case NEON::BI__builtin_neon_vaddvq_u8:
12879 usgn = true;
12880 [[fallthrough]];
12881 case NEON::BI__builtin_neon_vaddvq_s8: {
12882 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12883 Ty = Int32Ty;
12884 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12885 llvm::Type *Tys[2] = { Ty, VTy };
12886 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12887 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12888 return Builder.CreateTrunc(Ops[0], Int8Ty);
12889 }
12890 case NEON::BI__builtin_neon_vaddvq_u16:
12891 usgn = true;
12892 [[fallthrough]];
12893 case NEON::BI__builtin_neon_vaddvq_s16: {
12894 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12895 Ty = Int32Ty;
12896 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12897 llvm::Type *Tys[2] = { Ty, VTy };
12898 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12899 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12900 return Builder.CreateTrunc(Ops[0], Int16Ty);
12901 }
12902 case NEON::BI__builtin_neon_vmaxv_u8: {
12903 Int = Intrinsic::aarch64_neon_umaxv;
12904 Ty = Int32Ty;
12905 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12906 llvm::Type *Tys[2] = { Ty, VTy };
12907 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12908 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12909 return Builder.CreateTrunc(Ops[0], Int8Ty);
12910 }
12911 case NEON::BI__builtin_neon_vmaxv_u16: {
12912 Int = Intrinsic::aarch64_neon_umaxv;
12913 Ty = Int32Ty;
12914 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12915 llvm::Type *Tys[2] = { Ty, VTy };
12916 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12917 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12918 return Builder.CreateTrunc(Ops[0], Int16Ty);
12919 }
12920 case NEON::BI__builtin_neon_vmaxvq_u8: {
12921 Int = Intrinsic::aarch64_neon_umaxv;
12922 Ty = Int32Ty;
12923 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12924 llvm::Type *Tys[2] = { Ty, VTy };
12925 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12926 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12927 return Builder.CreateTrunc(Ops[0], Int8Ty);
12928 }
12929 case NEON::BI__builtin_neon_vmaxvq_u16: {
12930 Int = Intrinsic::aarch64_neon_umaxv;
12931 Ty = Int32Ty;
12932 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12933 llvm::Type *Tys[2] = { Ty, VTy };
12934 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12935 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12936 return Builder.CreateTrunc(Ops[0], Int16Ty);
12937 }
12938 case NEON::BI__builtin_neon_vmaxv_s8: {
12939 Int = Intrinsic::aarch64_neon_smaxv;
12940 Ty = Int32Ty;
12941 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12942 llvm::Type *Tys[2] = { Ty, VTy };
12943 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12944 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12945 return Builder.CreateTrunc(Ops[0], Int8Ty);
12946 }
12947 case NEON::BI__builtin_neon_vmaxv_s16: {
12948 Int = Intrinsic::aarch64_neon_smaxv;
12949 Ty = Int32Ty;
12950 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12951 llvm::Type *Tys[2] = { Ty, VTy };
12952 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12953 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12954 return Builder.CreateTrunc(Ops[0], Int16Ty);
12955 }
12956 case NEON::BI__builtin_neon_vmaxvq_s8: {
12957 Int = Intrinsic::aarch64_neon_smaxv;
12958 Ty = Int32Ty;
12959 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12960 llvm::Type *Tys[2] = { Ty, VTy };
12961 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12962 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12963 return Builder.CreateTrunc(Ops[0], Int8Ty);
12964 }
12965 case NEON::BI__builtin_neon_vmaxvq_s16: {
12966 Int = Intrinsic::aarch64_neon_smaxv;
12967 Ty = Int32Ty;
12968 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12969 llvm::Type *Tys[2] = { Ty, VTy };
12970 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12971 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12972 return Builder.CreateTrunc(Ops[0], Int16Ty);
12973 }
12974 case NEON::BI__builtin_neon_vmaxv_f16: {
12975 Int = Intrinsic::aarch64_neon_fmaxv;
12976 Ty = HalfTy;
12977 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12978 llvm::Type *Tys[2] = { Ty, VTy };
12979 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12980 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12981 return Builder.CreateTrunc(Ops[0], HalfTy);
12982 }
12983 case NEON::BI__builtin_neon_vmaxvq_f16: {
12984 Int = Intrinsic::aarch64_neon_fmaxv;
12985 Ty = HalfTy;
12986 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12987 llvm::Type *Tys[2] = { Ty, VTy };
12988 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12989 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12990 return Builder.CreateTrunc(Ops[0], HalfTy);
12991 }
12992 case NEON::BI__builtin_neon_vminv_u8: {
12993 Int = Intrinsic::aarch64_neon_uminv;
12994 Ty = Int32Ty;
12995 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12996 llvm::Type *Tys[2] = { Ty, VTy };
12997 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12998 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12999 return Builder.CreateTrunc(Ops[0], Int8Ty);
13000 }
13001 case NEON::BI__builtin_neon_vminv_u16: {
13002 Int = Intrinsic::aarch64_neon_uminv;
13003 Ty = Int32Ty;
13004 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13005 llvm::Type *Tys[2] = { Ty, VTy };
13006 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13007 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13008 return Builder.CreateTrunc(Ops[0], Int16Ty);
13009 }
13010 case NEON::BI__builtin_neon_vminvq_u8: {
13011 Int = Intrinsic::aarch64_neon_uminv;
13012 Ty = Int32Ty;
13013 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13014 llvm::Type *Tys[2] = { Ty, VTy };
13015 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13016 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13017 return Builder.CreateTrunc(Ops[0], Int8Ty);
13018 }
13019 case NEON::BI__builtin_neon_vminvq_u16: {
13020 Int = Intrinsic::aarch64_neon_uminv;
13021 Ty = Int32Ty;
13022 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13023 llvm::Type *Tys[2] = { Ty, VTy };
13024 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13025 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13026 return Builder.CreateTrunc(Ops[0], Int16Ty);
13027 }
13028 case NEON::BI__builtin_neon_vminv_s8: {
13029 Int = Intrinsic::aarch64_neon_sminv;
13030 Ty = Int32Ty;
13031 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13032 llvm::Type *Tys[2] = { Ty, VTy };
13033 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13034 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13035 return Builder.CreateTrunc(Ops[0], Int8Ty);
13036 }
13037 case NEON::BI__builtin_neon_vminv_s16: {
13038 Int = Intrinsic::aarch64_neon_sminv;
13039 Ty = Int32Ty;
13040 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13041 llvm::Type *Tys[2] = { Ty, VTy };
13042 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13043 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13044 return Builder.CreateTrunc(Ops[0], Int16Ty);
13045 }
13046 case NEON::BI__builtin_neon_vminvq_s8: {
13047 Int = Intrinsic::aarch64_neon_sminv;
13048 Ty = Int32Ty;
13049 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13050 llvm::Type *Tys[2] = { Ty, VTy };
13051 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13052 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13053 return Builder.CreateTrunc(Ops[0], Int8Ty);
13054 }
13055 case NEON::BI__builtin_neon_vminvq_s16: {
13056 Int = Intrinsic::aarch64_neon_sminv;
13057 Ty = Int32Ty;
13058 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13059 llvm::Type *Tys[2] = { Ty, VTy };
13060 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13061 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13062 return Builder.CreateTrunc(Ops[0], Int16Ty);
13063 }
13064 case NEON::BI__builtin_neon_vminv_f16: {
13065 Int = Intrinsic::aarch64_neon_fminv;
13066 Ty = HalfTy;
13067 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13068 llvm::Type *Tys[2] = { Ty, VTy };
13069 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13070 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13071 return Builder.CreateTrunc(Ops[0], HalfTy);
13072 }
13073 case NEON::BI__builtin_neon_vminvq_f16: {
13074 Int = Intrinsic::aarch64_neon_fminv;
13075 Ty = HalfTy;
13076 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13077 llvm::Type *Tys[2] = { Ty, VTy };
13078 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13079 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13080 return Builder.CreateTrunc(Ops[0], HalfTy);
13081 }
13082 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13083 Int = Intrinsic::aarch64_neon_fmaxnmv;
13084 Ty = HalfTy;
13085 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13086 llvm::Type *Tys[2] = { Ty, VTy };
13087 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13088 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13089 return Builder.CreateTrunc(Ops[0], HalfTy);
13090 }
13091 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13092 Int = Intrinsic::aarch64_neon_fmaxnmv;
13093 Ty = HalfTy;
13094 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13095 llvm::Type *Tys[2] = { Ty, VTy };
13096 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13097 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13098 return Builder.CreateTrunc(Ops[0], HalfTy);
13099 }
13100 case NEON::BI__builtin_neon_vminnmv_f16: {
13101 Int = Intrinsic::aarch64_neon_fminnmv;
13102 Ty = HalfTy;
13103 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13104 llvm::Type *Tys[2] = { Ty, VTy };
13105 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13106 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13107 return Builder.CreateTrunc(Ops[0], HalfTy);
13108 }
13109 case NEON::BI__builtin_neon_vminnmvq_f16: {
13110 Int = Intrinsic::aarch64_neon_fminnmv;
13111 Ty = HalfTy;
13112 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13113 llvm::Type *Tys[2] = { Ty, VTy };
13114 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13115 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13116 return Builder.CreateTrunc(Ops[0], HalfTy);
13117 }
13118 case NEON::BI__builtin_neon_vmul_n_f64: {
13119 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13120 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13121 return Builder.CreateFMul(Ops[0], RHS);
13122 }
13123 case NEON::BI__builtin_neon_vaddlv_u8: {
13124 Int = Intrinsic::aarch64_neon_uaddlv;
13125 Ty = Int32Ty;
13126 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13127 llvm::Type *Tys[2] = { Ty, VTy };
13128 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13129 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13130 return Builder.CreateTrunc(Ops[0], Int16Ty);
13131 }
13132 case NEON::BI__builtin_neon_vaddlv_u16: {
13133 Int = Intrinsic::aarch64_neon_uaddlv;
13134 Ty = Int32Ty;
13135 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13136 llvm::Type *Tys[2] = { Ty, VTy };
13137 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13138 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13139 }
13140 case NEON::BI__builtin_neon_vaddlvq_u8: {
13141 Int = Intrinsic::aarch64_neon_uaddlv;
13142 Ty = Int32Ty;
13143 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13144 llvm::Type *Tys[2] = { Ty, VTy };
13145 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13146 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13147 return Builder.CreateTrunc(Ops[0], Int16Ty);
13148 }
13149 case NEON::BI__builtin_neon_vaddlvq_u16: {
13150 Int = Intrinsic::aarch64_neon_uaddlv;
13151 Ty = Int32Ty;
13152 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13153 llvm::Type *Tys[2] = { Ty, VTy };
13154 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13155 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13156 }
13157 case NEON::BI__builtin_neon_vaddlv_s8: {
13158 Int = Intrinsic::aarch64_neon_saddlv;
13159 Ty = Int32Ty;
13160 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13161 llvm::Type *Tys[2] = { Ty, VTy };
13162 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13163 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13164 return Builder.CreateTrunc(Ops[0], Int16Ty);
13165 }
13166 case NEON::BI__builtin_neon_vaddlv_s16: {
13167 Int = Intrinsic::aarch64_neon_saddlv;
13168 Ty = Int32Ty;
13169 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13170 llvm::Type *Tys[2] = { Ty, VTy };
13171 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13172 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13173 }
13174 case NEON::BI__builtin_neon_vaddlvq_s8: {
13175 Int = Intrinsic::aarch64_neon_saddlv;
13176 Ty = Int32Ty;
13177 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13178 llvm::Type *Tys[2] = { Ty, VTy };
13179 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13180 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13181 return Builder.CreateTrunc(Ops[0], Int16Ty);
13182 }
13183 case NEON::BI__builtin_neon_vaddlvq_s16: {
13184 Int = Intrinsic::aarch64_neon_saddlv;
13185 Ty = Int32Ty;
13186 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13187 llvm::Type *Tys[2] = { Ty, VTy };
13188 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13189 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13190 }
13191 case NEON::BI__builtin_neon_vsri_n_v:
13192 case NEON::BI__builtin_neon_vsriq_n_v: {
13193 Int = Intrinsic::aarch64_neon_vsri;
13194 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13195 return EmitNeonCall(Intrin, Ops, "vsri_n");
13196 }
13197 case NEON::BI__builtin_neon_vsli_n_v:
13198 case NEON::BI__builtin_neon_vsliq_n_v: {
13199 Int = Intrinsic::aarch64_neon_vsli;
13200 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13201 return EmitNeonCall(Intrin, Ops, "vsli_n");
13202 }
13203 case NEON::BI__builtin_neon_vsra_n_v:
13204 case NEON::BI__builtin_neon_vsraq_n_v:
13205 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13206 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13207 return Builder.CreateAdd(Ops[0], Ops[1]);
13208 case NEON::BI__builtin_neon_vrsra_n_v:
13209 case NEON::BI__builtin_neon_vrsraq_n_v: {
13210 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13212 TmpOps.push_back(Ops[1]);
13213 TmpOps.push_back(Ops[2]);
13214 Function* F = CGM.getIntrinsic(Int, Ty);
13215 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13216 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13217 return Builder.CreateAdd(Ops[0], tmp);
13218 }
13219 case NEON::BI__builtin_neon_vld1_v:
13220 case NEON::BI__builtin_neon_vld1q_v: {
13221 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13222 }
13223 case NEON::BI__builtin_neon_vst1_v:
13224 case NEON::BI__builtin_neon_vst1q_v:
13225 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13226 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13227 case NEON::BI__builtin_neon_vld1_lane_v:
13228 case NEON::BI__builtin_neon_vld1q_lane_v: {
13229 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13230 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13231 PtrOp0.getAlignment());
13232 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13233 }
13234 case NEON::BI__builtin_neon_vldap1_lane_s64:
13235 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13236 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13237 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13238 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13239 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13240 Ops[0] = LI;
13241 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13242 }
13243 case NEON::BI__builtin_neon_vld1_dup_v:
13244 case NEON::BI__builtin_neon_vld1q_dup_v: {
13245 Value *V = PoisonValue::get(Ty);
13246 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13247 PtrOp0.getAlignment());
13248 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13249 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13250 return EmitNeonSplat(Ops[0], CI);
13251 }
13252 case NEON::BI__builtin_neon_vst1_lane_v:
13253 case NEON::BI__builtin_neon_vst1q_lane_v:
13254 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13255 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13256 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13257 case NEON::BI__builtin_neon_vstl1_lane_s64:
13258 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13259 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13260 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13261 llvm::StoreInst *SI =
13262 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13263 SI->setAtomic(llvm::AtomicOrdering::Release);
13264 return SI;
13265 }
13266 case NEON::BI__builtin_neon_vld2_v:
13267 case NEON::BI__builtin_neon_vld2q_v: {
13268 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13269 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13270 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13271 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13272 }
13273 case NEON::BI__builtin_neon_vld3_v:
13274 case NEON::BI__builtin_neon_vld3q_v: {
13275 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13276 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13277 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13278 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13279 }
13280 case NEON::BI__builtin_neon_vld4_v:
13281 case NEON::BI__builtin_neon_vld4q_v: {
13282 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13283 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13284 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13285 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13286 }
13287 case NEON::BI__builtin_neon_vld2_dup_v:
13288 case NEON::BI__builtin_neon_vld2q_dup_v: {
13289 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13290 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13291 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13292 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13293 }
13294 case NEON::BI__builtin_neon_vld3_dup_v:
13295 case NEON::BI__builtin_neon_vld3q_dup_v: {
13296 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13297 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13298 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13299 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13300 }
13301 case NEON::BI__builtin_neon_vld4_dup_v:
13302 case NEON::BI__builtin_neon_vld4q_dup_v: {
13303 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13304 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13305 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13306 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13307 }
13308 case NEON::BI__builtin_neon_vld2_lane_v:
13309 case NEON::BI__builtin_neon_vld2q_lane_v: {
13310 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13311 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13312 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13313 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13314 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13315 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13316 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13317 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13318 }
13319 case NEON::BI__builtin_neon_vld3_lane_v:
13320 case NEON::BI__builtin_neon_vld3q_lane_v: {
13321 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13322 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13323 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13324 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13325 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13326 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13327 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13328 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13329 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13330 }
13331 case NEON::BI__builtin_neon_vld4_lane_v:
13332 case NEON::BI__builtin_neon_vld4q_lane_v: {
13333 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13334 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13335 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13336 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13337 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13338 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13339 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13340 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13341 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13342 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13343 }
13344 case NEON::BI__builtin_neon_vst2_v:
13345 case NEON::BI__builtin_neon_vst2q_v: {
13346 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13347 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13348 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13349 Ops, "");
13350 }
13351 case NEON::BI__builtin_neon_vst2_lane_v:
13352 case NEON::BI__builtin_neon_vst2q_lane_v: {
13353 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13354 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13355 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13356 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13357 Ops, "");
13358 }
13359 case NEON::BI__builtin_neon_vst3_v:
13360 case NEON::BI__builtin_neon_vst3q_v: {
13361 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13362 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13363 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13364 Ops, "");
13365 }
13366 case NEON::BI__builtin_neon_vst3_lane_v:
13367 case NEON::BI__builtin_neon_vst3q_lane_v: {
13368 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13369 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13370 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13371 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13372 Ops, "");
13373 }
13374 case NEON::BI__builtin_neon_vst4_v:
13375 case NEON::BI__builtin_neon_vst4q_v: {
13376 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13377 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13378 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13379 Ops, "");
13380 }
13381 case NEON::BI__builtin_neon_vst4_lane_v:
13382 case NEON::BI__builtin_neon_vst4q_lane_v: {
13383 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13384 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13385 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13386 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13387 Ops, "");
13388 }
13389 case NEON::BI__builtin_neon_vtrn_v:
13390 case NEON::BI__builtin_neon_vtrnq_v: {
13391 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13392 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13393 Value *SV = nullptr;
13394
13395 for (unsigned vi = 0; vi != 2; ++vi) {
13396 SmallVector<int, 16> Indices;
13397 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13398 Indices.push_back(i+vi);
13399 Indices.push_back(i+e+vi);
13400 }
13401 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13402 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13403 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13404 }
13405 return SV;
13406 }
13407 case NEON::BI__builtin_neon_vuzp_v:
13408 case NEON::BI__builtin_neon_vuzpq_v: {
13409 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13410 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13411 Value *SV = nullptr;
13412
13413 for (unsigned vi = 0; vi != 2; ++vi) {
13414 SmallVector<int, 16> Indices;
13415 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13416 Indices.push_back(2*i+vi);
13417
13418 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13419 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13420 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13421 }
13422 return SV;
13423 }
13424 case NEON::BI__builtin_neon_vzip_v:
13425 case NEON::BI__builtin_neon_vzipq_v: {
13426 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13427 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13428 Value *SV = nullptr;
13429
13430 for (unsigned vi = 0; vi != 2; ++vi) {
13431 SmallVector<int, 16> Indices;
13432 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13433 Indices.push_back((i + vi*e) >> 1);
13434 Indices.push_back(((i + vi*e) >> 1)+e);
13435 }
13436 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13437 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13438 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13439 }
13440 return SV;
13441 }
13442 case NEON::BI__builtin_neon_vqtbl1q_v: {
13443 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13444 Ops, "vtbl1");
13445 }
13446 case NEON::BI__builtin_neon_vqtbl2q_v: {
13447 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13448 Ops, "vtbl2");
13449 }
13450 case NEON::BI__builtin_neon_vqtbl3q_v: {
13451 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13452 Ops, "vtbl3");
13453 }
13454 case NEON::BI__builtin_neon_vqtbl4q_v: {
13455 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13456 Ops, "vtbl4");
13457 }
13458 case NEON::BI__builtin_neon_vqtbx1q_v: {
13459 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13460 Ops, "vtbx1");
13461 }
13462 case NEON::BI__builtin_neon_vqtbx2q_v: {
13463 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13464 Ops, "vtbx2");
13465 }
13466 case NEON::BI__builtin_neon_vqtbx3q_v: {
13467 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13468 Ops, "vtbx3");
13469 }
13470 case NEON::BI__builtin_neon_vqtbx4q_v: {
13471 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13472 Ops, "vtbx4");
13473 }
13474 case NEON::BI__builtin_neon_vsqadd_v:
13475 case NEON::BI__builtin_neon_vsqaddq_v: {
13476 Int = Intrinsic::aarch64_neon_usqadd;
13477 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13478 }
13479 case NEON::BI__builtin_neon_vuqadd_v:
13480 case NEON::BI__builtin_neon_vuqaddq_v: {
13481 Int = Intrinsic::aarch64_neon_suqadd;
13482 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13483 }
13484 }
13485}
13486
13487Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13488 const CallExpr *E) {
13489 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13490 BuiltinID == BPF::BI__builtin_btf_type_id ||
13491 BuiltinID == BPF::BI__builtin_preserve_type_info ||
13492 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13493 "unexpected BPF builtin");
13494
13495 // A sequence number, injected into IR builtin functions, to
13496 // prevent CSE given the only difference of the function
13497 // may just be the debuginfo metadata.
13498 static uint32_t BuiltinSeqNum;
13499
13500 switch (BuiltinID) {
13501 default:
13502 llvm_unreachable("Unexpected BPF builtin");
13503 case BPF::BI__builtin_preserve_field_info: {
13504 const Expr *Arg = E->getArg(0);
13505 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13506
13507 if (!getDebugInfo()) {
13508 CGM.Error(E->getExprLoc(),
13509 "using __builtin_preserve_field_info() without -g");
13510 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13511 : EmitLValue(Arg).emitRawPointer(*this);
13512 }
13513
13514 // Enable underlying preserve_*_access_index() generation.
13515 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
13516 IsInPreservedAIRegion = true;
13517 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13518 : EmitLValue(Arg).emitRawPointer(*this);
13519 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
13520
13521 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13522 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
13523
13524 // Built the IR for the preserve_field_info intrinsic.
13525 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
13526 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
13527 {FieldAddr->getType()});
13528 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
13529 }
13530 case BPF::BI__builtin_btf_type_id:
13531 case BPF::BI__builtin_preserve_type_info: {
13532 if (!getDebugInfo()) {
13533 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13534 return nullptr;
13535 }
13536
13537 const Expr *Arg0 = E->getArg(0);
13538 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13539 Arg0->getType(), Arg0->getExprLoc());
13540
13541 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13542 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13543 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13544
13545 llvm::Function *FnDecl;
13546 if (BuiltinID == BPF::BI__builtin_btf_type_id)
13547 FnDecl = llvm::Intrinsic::getDeclaration(
13548 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
13549 else
13550 FnDecl = llvm::Intrinsic::getDeclaration(
13551 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
13552 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
13553 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13554 return Fn;
13555 }
13556 case BPF::BI__builtin_preserve_enum_value: {
13557 if (!getDebugInfo()) {
13558 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13559 return nullptr;
13560 }
13561
13562 const Expr *Arg0 = E->getArg(0);
13563 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13564 Arg0->getType(), Arg0->getExprLoc());
13565
13566 // Find enumerator
13567 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
13568 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
13569 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
13570 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
13571
13572 auto InitVal = Enumerator->getInitVal();
13573 std::string InitValStr;
13574 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
13575 InitValStr = std::to_string(InitVal.getSExtValue());
13576 else
13577 InitValStr = std::to_string(InitVal.getZExtValue());
13578 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
13579 Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
13580
13581 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13582 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13583 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13584
13585 llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
13586 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
13587 CallInst *Fn =
13588 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
13589 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13590 return Fn;
13591 }
13592 }
13593}
13594
13595llvm::Value *CodeGenFunction::
13597 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
13598 "Not a power-of-two sized vector!");
13599 bool AllConstants = true;
13600 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
13601 AllConstants &= isa<Constant>(Ops[i]);
13602
13603 // If this is a constant vector, create a ConstantVector.
13604 if (AllConstants) {
13606 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13607 CstOps.push_back(cast<Constant>(Ops[i]));
13608 return llvm::ConstantVector::get(CstOps);
13609 }
13610
13611 // Otherwise, insertelement the values to build the vector.
13612 Value *Result = llvm::PoisonValue::get(
13613 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
13614
13615 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13616 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
13617
13618 return Result;
13619}
13620
13621// Convert the mask from an integer type to a vector of i1.
13623 unsigned NumElts) {
13624
13625 auto *MaskTy = llvm::FixedVectorType::get(
13626 CGF.Builder.getInt1Ty(),
13627 cast<IntegerType>(Mask->getType())->getBitWidth());
13628 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
13629
13630 // If we have less than 8 elements, then the starting mask was an i8 and
13631 // we need to extract down to the right number of elements.
13632 if (NumElts < 8) {
13633 int Indices[4];
13634 for (unsigned i = 0; i != NumElts; ++i)
13635 Indices[i] = i;
13636 MaskVec = CGF.Builder.CreateShuffleVector(
13637 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
13638 }
13639 return MaskVec;
13640}
13641
13643 Align Alignment) {
13644 Value *Ptr = Ops[0];
13645
13646 Value *MaskVec = getMaskVecValue(
13647 CGF, Ops[2],
13648 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
13649
13650 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
13651}
13652
13654 Align Alignment) {
13655 llvm::Type *Ty = Ops[1]->getType();
13656 Value *Ptr = Ops[0];
13657
13658 Value *MaskVec = getMaskVecValue(
13659 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
13660
13661 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
13662}
13663
13665 ArrayRef<Value *> Ops) {
13666 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
13667 Value *Ptr = Ops[0];
13668
13669 Value *MaskVec = getMaskVecValue(
13670 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
13671
13672 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
13673 ResultTy);
13674 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
13675}
13676
13679 bool IsCompress) {
13680 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13681
13682 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13683
13684 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
13685 : Intrinsic::x86_avx512_mask_expand;
13686 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
13687 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
13688}
13689
13691 ArrayRef<Value *> Ops) {
13692 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13693 Value *Ptr = Ops[0];
13694
13695 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13696
13697 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
13698 ResultTy);
13699 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
13700}
13701
13702static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
13704 bool InvertLHS = false) {
13705 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13706 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
13707 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
13708
13709 if (InvertLHS)
13710 LHS = CGF.Builder.CreateNot(LHS);
13711
13712 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
13713 Ops[0]->getType());
13714}
13715
13717 Value *Amt, bool IsRight) {
13718 llvm::Type *Ty = Op0->getType();
13719
13720 // Amount may be scalar immediate, in which case create a splat vector.
13721 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
13722 // we only care about the lowest log2 bits anyway.
13723 if (Amt->getType() != Ty) {
13724 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
13725 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
13726 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
13727 }
13728
13729 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
13730 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
13731 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
13732}
13733
13735 bool IsSigned) {
13736 Value *Op0 = Ops[0];
13737 Value *Op1 = Ops[1];
13738 llvm::Type *Ty = Op0->getType();
13739 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
13740
13741 CmpInst::Predicate Pred;
13742 switch (Imm) {
13743 case 0x0:
13744 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
13745 break;
13746 case 0x1:
13747 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
13748 break;
13749 case 0x2:
13750 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13751 break;
13752 case 0x3:
13753 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13754 break;
13755 case 0x4:
13756 Pred = ICmpInst::ICMP_EQ;
13757 break;
13758 case 0x5:
13759 Pred = ICmpInst::ICMP_NE;
13760 break;
13761 case 0x6:
13762 return llvm::Constant::getNullValue(Ty); // FALSE
13763 case 0x7:
13764 return llvm::Constant::getAllOnesValue(Ty); // TRUE
13765 default:
13766 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
13767 }
13768
13769 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
13770 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
13771 return Res;
13772}
13773
13775 Value *Mask, Value *Op0, Value *Op1) {
13776
13777 // If the mask is all ones just return first argument.
13778 if (const auto *C = dyn_cast<Constant>(Mask))
13779 if (C->isAllOnesValue())
13780 return Op0;
13781
13782 Mask = getMaskVecValue(
13783 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
13784
13785 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13786}
13787
13789 Value *Mask, Value *Op0, Value *Op1) {
13790 // If the mask is all ones just return first argument.
13791 if (const auto *C = dyn_cast<Constant>(Mask))
13792 if (C->isAllOnesValue())
13793 return Op0;
13794
13795 auto *MaskTy = llvm::FixedVectorType::get(
13796 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
13797 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
13798 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
13799 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13800}
13801
13803 unsigned NumElts, Value *MaskIn) {
13804 if (MaskIn) {
13805 const auto *C = dyn_cast<Constant>(MaskIn);
13806 if (!C || !C->isAllOnesValue())
13807 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
13808 }
13809
13810 if (NumElts < 8) {
13811 int Indices[8];
13812 for (unsigned i = 0; i != NumElts; ++i)
13813 Indices[i] = i;
13814 for (unsigned i = NumElts; i != 8; ++i)
13815 Indices[i] = i % NumElts + NumElts;
13816 Cmp = CGF.Builder.CreateShuffleVector(
13817 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
13818 }
13819
13820 return CGF.Builder.CreateBitCast(Cmp,
13821 IntegerType::get(CGF.getLLVMContext(),
13822 std::max(NumElts, 8U)));
13823}
13824
13826 bool Signed, ArrayRef<Value *> Ops) {
13827 assert((Ops.size() == 2 || Ops.size() == 4) &&
13828 "Unexpected number of arguments");
13829 unsigned NumElts =
13830 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13831 Value *Cmp;
13832
13833 if (CC == 3) {
13834 Cmp = Constant::getNullValue(
13835 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13836 } else if (CC == 7) {
13837 Cmp = Constant::getAllOnesValue(
13838 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13839 } else {
13840 ICmpInst::Predicate Pred;
13841 switch (CC) {
13842 default: llvm_unreachable("Unknown condition code");
13843 case 0: Pred = ICmpInst::ICMP_EQ; break;
13844 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
13845 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
13846 case 4: Pred = ICmpInst::ICMP_NE; break;
13847 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
13848 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
13849 }
13850 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
13851 }
13852
13853 Value *MaskIn = nullptr;
13854 if (Ops.size() == 4)
13855 MaskIn = Ops[3];
13856
13857 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
13858}
13859
13861 Value *Zero = Constant::getNullValue(In->getType());
13862 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
13863}
13864
13866 ArrayRef<Value *> Ops, bool IsSigned) {
13867 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
13868 llvm::Type *Ty = Ops[1]->getType();
13869
13870 Value *Res;
13871 if (Rnd != 4) {
13872 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
13873 : Intrinsic::x86_avx512_uitofp_round;
13874 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
13875 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
13876 } else {
13877 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13878 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
13879 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
13880 }
13881
13882 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13883}
13884
13885// Lowers X86 FMA intrinsics to IR.
13887 ArrayRef<Value *> Ops, unsigned BuiltinID,
13888 bool IsAddSub) {
13889
13890 bool Subtract = false;
13891 Intrinsic::ID IID = Intrinsic::not_intrinsic;
13892 switch (BuiltinID) {
13893 default: break;
13894 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13895 Subtract = true;
13896 [[fallthrough]];
13897 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13898 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13899 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13900 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
13901 break;
13902 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13903 Subtract = true;
13904 [[fallthrough]];
13905 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13906 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13907 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13908 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
13909 break;
13910 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13911 Subtract = true;
13912 [[fallthrough]];
13913 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13914 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13915 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13916 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13917 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13918 Subtract = true;
13919 [[fallthrough]];
13920 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13921 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13922 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13923 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13924 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13925 Subtract = true;
13926 [[fallthrough]];
13927 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13928 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13929 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13930 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13931 break;
13932 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13933 Subtract = true;
13934 [[fallthrough]];
13935 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13936 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13937 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13938 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13939 break;
13940 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
13941 Subtract = true;
13942 LLVM_FALLTHROUGH;
13943 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
13944 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
13945 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
13946 IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
13947 break;
13948 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
13949 Subtract = true;
13950 LLVM_FALLTHROUGH;
13951 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
13952 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
13953 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
13954 IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
13955 break;
13956 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
13957 Subtract = true;
13958 LLVM_FALLTHROUGH;
13959 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
13960 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
13961 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
13962 IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
13963 break;
13964 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
13965 Subtract = true;
13966 LLVM_FALLTHROUGH;
13967 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
13968 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
13969 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
13970 IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
13971 break;
13972 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
13973 Subtract = true;
13974 LLVM_FALLTHROUGH;
13975 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
13976 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
13977 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
13978 IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
13979 break;
13980 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
13981 Subtract = true;
13982 LLVM_FALLTHROUGH;
13983 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
13984 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
13985 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
13986 IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
13987 break;
13988 }
13989
13990 Value *A = Ops[0];
13991 Value *B = Ops[1];
13992 Value *C = Ops[2];
13993
13994 if (Subtract)
13995 C = CGF.Builder.CreateFNeg(C);
13996
13997 Value *Res;
13998
13999 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
14000 if (IID != Intrinsic::not_intrinsic &&
14001 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
14002 IsAddSub)) {
14003 Function *Intr = CGF.CGM.getIntrinsic(IID);
14004 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
14005 } else {
14006 llvm::Type *Ty = A->getType();
14007 Function *FMA;
14008 if (CGF.Builder.getIsFPConstrained()) {
14009 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14010 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
14011 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
14012 } else {
14013 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
14014 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
14015 }
14016 }
14017
14018 // Handle any required masking.
14019 Value *MaskFalseVal = nullptr;
14020 switch (BuiltinID) {
14021 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14022 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14023 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14024 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14025 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14026 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14027 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14028 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14029 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14030 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14031 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14032 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14033 MaskFalseVal = Ops[0];
14034 break;
14035 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14036 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14037 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14038 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14039 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14040 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14041 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14042 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14043 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14044 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14045 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14046 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14047 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14048 break;
14049 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14050 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14051 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14052 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14053 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14054 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14055 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14056 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14057 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14058 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14059 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14060 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14061 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14062 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14063 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14064 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14065 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14066 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14067 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14068 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14069 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14070 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14071 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14072 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14073 MaskFalseVal = Ops[2];
14074 break;
14075 }
14076
14077 if (MaskFalseVal)
14078 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14079
14080 return Res;
14081}
14082
14084 MutableArrayRef<Value *> Ops, Value *Upper,
14085 bool ZeroMask = false, unsigned PTIdx = 0,
14086 bool NegAcc = false) {
14087 unsigned Rnd = 4;
14088 if (Ops.size() > 4)
14089 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14090
14091 if (NegAcc)
14092 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14093
14094 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14095 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14096 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14097 Value *Res;
14098 if (Rnd != 4) {
14099 Intrinsic::ID IID;
14100
14101 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14102 case 16:
14103 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14104 break;
14105 case 32:
14106 IID = Intrinsic::x86_avx512_vfmadd_f32;
14107 break;
14108 case 64:
14109 IID = Intrinsic::x86_avx512_vfmadd_f64;
14110 break;
14111 default:
14112 llvm_unreachable("Unexpected size");
14113 }
14114 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14115 {Ops[0], Ops[1], Ops[2], Ops[4]});
14116 } else if (CGF.Builder.getIsFPConstrained()) {
14117 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14118 Function *FMA = CGF.CGM.getIntrinsic(
14119 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14120 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14121 } else {
14122 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14123 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14124 }
14125 // If we have more than 3 arguments, we need to do masking.
14126 if (Ops.size() > 3) {
14127 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14128 : Ops[PTIdx];
14129
14130 // If we negated the accumulator and the its the PassThru value we need to
14131 // bypass the negate. Conveniently Upper should be the same thing in this
14132 // case.
14133 if (NegAcc && PTIdx == 2)
14134 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14135
14136 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14137 }
14138 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14139}
14140
14141static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14142 ArrayRef<Value *> Ops) {
14143 llvm::Type *Ty = Ops[0]->getType();
14144 // Arguments have a vXi32 type so cast to vXi64.
14145 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14146 Ty->getPrimitiveSizeInBits() / 64);
14147 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14148 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14149
14150 if (IsSigned) {
14151 // Shift left then arithmetic shift right.
14152 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14153 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14154 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14155 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14156 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14157 } else {
14158 // Clear the upper bits.
14159 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14160 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14161 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14162 }
14163
14164 return CGF.Builder.CreateMul(LHS, RHS);
14165}
14166
14167// Emit a masked pternlog intrinsic. This only exists because the header has to
14168// use a macro and we aren't able to pass the input argument to a pternlog
14169// builtin and a select builtin without evaluating it twice.
14170static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14171 ArrayRef<Value *> Ops) {
14172 llvm::Type *Ty = Ops[0]->getType();
14173
14174 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14175 unsigned EltWidth = Ty->getScalarSizeInBits();
14176 Intrinsic::ID IID;
14177 if (VecWidth == 128 && EltWidth == 32)
14178 IID = Intrinsic::x86_avx512_pternlog_d_128;
14179 else if (VecWidth == 256 && EltWidth == 32)
14180 IID = Intrinsic::x86_avx512_pternlog_d_256;
14181 else if (VecWidth == 512 && EltWidth == 32)
14182 IID = Intrinsic::x86_avx512_pternlog_d_512;
14183 else if (VecWidth == 128 && EltWidth == 64)
14184 IID = Intrinsic::x86_avx512_pternlog_q_128;
14185 else if (VecWidth == 256 && EltWidth == 64)
14186 IID = Intrinsic::x86_avx512_pternlog_q_256;
14187 else if (VecWidth == 512 && EltWidth == 64)
14188 IID = Intrinsic::x86_avx512_pternlog_q_512;
14189 else
14190 llvm_unreachable("Unexpected intrinsic");
14191
14192 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14193 Ops.drop_back());
14194 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14195 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14196}
14197
14199 llvm::Type *DstTy) {
14200 unsigned NumberOfElements =
14201 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14202 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14203 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14204}
14205
14206Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14207 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14208 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14209 return EmitX86CpuIs(CPUStr);
14210}
14211
14212// Convert F16 halfs to floats.
14215 llvm::Type *DstTy) {
14216 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14217 "Unknown cvtph2ps intrinsic");
14218
14219 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14220 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14221 Function *F =
14222 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14223 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14224 }
14225
14226 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14227 Value *Src = Ops[0];
14228
14229 // Extract the subvector.
14230 if (NumDstElts !=
14231 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14232 assert(NumDstElts == 4 && "Unexpected vector size");
14233 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14234 }
14235
14236 // Bitcast from vXi16 to vXf16.
14237 auto *HalfTy = llvm::FixedVectorType::get(
14238 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14239 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14240
14241 // Perform the fp-extension.
14242 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14243
14244 if (Ops.size() >= 3)
14245 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14246 return Res;
14247}
14248
14249Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14250
14251 llvm::Type *Int32Ty = Builder.getInt32Ty();
14252
14253 // Matching the struct layout from the compiler-rt/libgcc structure that is
14254 // filled in:
14255 // unsigned int __cpu_vendor;
14256 // unsigned int __cpu_type;
14257 // unsigned int __cpu_subtype;
14258 // unsigned int __cpu_features[1];
14259 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14260 llvm::ArrayType::get(Int32Ty, 1));
14261
14262 // Grab the global __cpu_model.
14263 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14264 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14265
14266 // Calculate the index needed to access the correct field based on the
14267 // range. Also adjust the expected value.
14268 unsigned Index;
14269 unsigned Value;
14270 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14271#define X86_VENDOR(ENUM, STRING) \
14272 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14273#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14274 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14275#define X86_CPU_TYPE(ENUM, STR) \
14276 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14277#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14278 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14279#define X86_CPU_SUBTYPE(ENUM, STR) \
14280 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14281#include "llvm/TargetParser/X86TargetParser.def"
14282 .Default({0, 0});
14283 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14284
14285 // Grab the appropriate field from __cpu_model.
14286 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14287 ConstantInt::get(Int32Ty, Index)};
14288 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14289 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14291
14292 // Check the value of the field against the requested value.
14293 return Builder.CreateICmpEQ(CpuValue,
14294 llvm::ConstantInt::get(Int32Ty, Value));
14295}
14296
14297Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14298 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14299 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14300 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14301 return Builder.getFalse();
14302 return EmitX86CpuSupports(FeatureStr);
14303}
14304
14305Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14306 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14307}
14308
14309llvm::Value *
14310CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14311 Value *Result = Builder.getTrue();
14312 if (FeatureMask[0] != 0) {
14313 // Matching the struct layout from the compiler-rt/libgcc structure that is
14314 // filled in:
14315 // unsigned int __cpu_vendor;
14316 // unsigned int __cpu_type;
14317 // unsigned int __cpu_subtype;
14318 // unsigned int __cpu_features[1];
14319 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14320 llvm::ArrayType::get(Int32Ty, 1));
14321
14322 // Grab the global __cpu_model.
14323 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14324 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14325
14326 // Grab the first (0th) element from the field __cpu_features off of the
14327 // global in the struct STy.
14328 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14329 Builder.getInt32(0)};
14330 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14331 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14333
14334 // Check the value of the bit corresponding to the feature requested.
14335 Value *Mask = Builder.getInt32(FeatureMask[0]);
14336 Value *Bitset = Builder.CreateAnd(Features, Mask);
14337 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14338 Result = Builder.CreateAnd(Result, Cmp);
14339 }
14340
14341 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14342 llvm::Constant *CpuFeatures2 =
14343 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14344 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14345 for (int i = 1; i != 4; ++i) {
14346 const uint32_t M = FeatureMask[i];
14347 if (!M)
14348 continue;
14349 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14350 Value *Features = Builder.CreateAlignedLoad(
14351 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
14353 // Check the value of the bit corresponding to the feature requested.
14354 Value *Mask = Builder.getInt32(M);
14355 Value *Bitset = Builder.CreateAnd(Features, Mask);
14356 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14357 Result = Builder.CreateAnd(Result, Cmp);
14358 }
14359
14360 return Result;
14361}
14362
14363Value *CodeGenFunction::EmitAArch64CpuInit() {
14364 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14365 llvm::FunctionCallee Func =
14366 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14367 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14368 cast<llvm::GlobalValue>(Func.getCallee())
14369 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14370 return Builder.CreateCall(Func);
14371}
14372
14374 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {VoidPtrTy}, false);
14375 llvm::FunctionCallee Func =
14376 CGM.CreateRuntimeFunction(FTy, "__init_riscv_feature_bits");
14377 auto *CalleeGV = cast<llvm::GlobalValue>(Func.getCallee());
14378 CalleeGV->setDSOLocal(true);
14379 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14380 return Builder.CreateCall(Func, {llvm::ConstantPointerNull::get(VoidPtrTy)});
14381}
14382
14383Value *CodeGenFunction::EmitX86CpuInit() {
14384 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14385 /*Variadic*/ false);
14386 llvm::FunctionCallee Func =
14387 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14388 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14389 cast<llvm::GlobalValue>(Func.getCallee())
14390 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14391 return Builder.CreateCall(Func);
14392}
14393
14394Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14395 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14396 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14398 ArgStr.split(Features, "+");
14399 for (auto &Feature : Features) {
14400 Feature = Feature.trim();
14401 if (!llvm::AArch64::parseFMVExtension(Feature))
14402 return Builder.getFalse();
14403 if (Feature != "default")
14404 Features.push_back(Feature);
14405 }
14406 return EmitAArch64CpuSupports(Features);
14407}
14408
14409llvm::Value *
14410CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14411 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14412 Value *Result = Builder.getTrue();
14413 if (FeaturesMask != 0) {
14414 // Get features from structure in runtime library
14415 // struct {
14416 // unsigned long long features;
14417 // } __aarch64_cpu_features;
14418 llvm::Type *STy = llvm::StructType::get(Int64Ty);
14419 llvm::Constant *AArch64CPUFeatures =
14420 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14421 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14422 llvm::Value *CpuFeatures = Builder.CreateGEP(
14423 STy, AArch64CPUFeatures,
14424 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14425 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14427 Value *Mask = Builder.getInt64(FeaturesMask);
14428 Value *Bitset = Builder.CreateAnd(Features, Mask);
14429 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14430 Result = Builder.CreateAnd(Result, Cmp);
14431 }
14432 return Result;
14433}
14434
14436
14437 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14438 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14439 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14440 return Builder.getFalse();
14441
14442 // Note: We are making an unchecked assumption that the size of the
14443 // feature array is >= 1. This holds for any version of compiler-rt
14444 // which defines this interface.
14445 llvm::ArrayType *ArrayOfInt64Ty = llvm::ArrayType::get(Int64Ty, 1);
14446 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
14447 llvm::Constant *RISCVFeaturesBits =
14448 CGM.CreateRuntimeVariable(StructTy, "__riscv_feature_bits");
14449 auto *GV = cast<llvm::GlobalValue>(RISCVFeaturesBits);
14450 GV->setDSOLocal(true);
14451
14452 auto LoadFeatureBit = [&](unsigned Index) {
14453 // Create GEP then load.
14454 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
14455 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
14456 IndexVal};
14457 Value *Ptr =
14458 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
14459 Value *FeaturesBit =
14461 return FeaturesBit;
14462 };
14463
14464 auto [GroupID, BitPos] = RISCVISAInfo::getRISCVFeaturesBitsInfo(FeatureStr);
14465 assert(BitPos != -1 && "validation should have rejected this feature");
14466 Value *MaskV = Builder.getInt64(1ULL << BitPos);
14467 Value *Bitset = Builder.CreateAnd(LoadFeatureBit(GroupID), MaskV);
14468 return Builder.CreateICmpEQ(Bitset, MaskV);
14469}
14470
14471Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14472 const CallExpr *E) {
14473 if (BuiltinID == Builtin::BI__builtin_cpu_is)
14474 return EmitX86CpuIs(E);
14475 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14476 return EmitX86CpuSupports(E);
14477 if (BuiltinID == Builtin::BI__builtin_cpu_init)
14478 return EmitX86CpuInit();
14479
14480 // Handle MSVC intrinsics before argument evaluation to prevent double
14481 // evaluation.
14482 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
14483 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
14484
14486 bool IsMaskFCmp = false;
14487 bool IsConjFMA = false;
14488
14489 // Find out if any arguments are required to be integer constant expressions.
14490 unsigned ICEArguments = 0;
14492 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
14493 assert(Error == ASTContext::GE_None && "Should not codegen an error");
14494
14495 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
14496 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
14497 }
14498
14499 // These exist so that the builtin that takes an immediate can be bounds
14500 // checked by clang to avoid passing bad immediates to the backend. Since
14501 // AVX has a larger immediate than SSE we would need separate builtins to
14502 // do the different bounds checking. Rather than create a clang specific
14503 // SSE only builtin, this implements eight separate builtins to match gcc
14504 // implementation.
14505 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
14506 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
14507 llvm::Function *F = CGM.getIntrinsic(ID);
14508 return Builder.CreateCall(F, Ops);
14509 };
14510
14511 // For the vector forms of FP comparisons, translate the builtins directly to
14512 // IR.
14513 // TODO: The builtins could be removed if the SSE header files used vector
14514 // extension comparisons directly (vector ordered/unordered may need
14515 // additional support via __builtin_isnan()).
14516 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
14517 bool IsSignaling) {
14518 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14519 Value *Cmp;
14520 if (IsSignaling)
14521 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
14522 else
14523 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
14524 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
14525 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
14526 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
14527 return Builder.CreateBitCast(Sext, FPVecTy);
14528 };
14529
14530 switch (BuiltinID) {
14531 default: return nullptr;
14532 case X86::BI_mm_prefetch: {
14533 Value *Address = Ops[0];
14534 ConstantInt *C = cast<ConstantInt>(Ops[1]);
14535 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
14536 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
14537 Value *Data = ConstantInt::get(Int32Ty, 1);
14538 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
14539 return Builder.CreateCall(F, {Address, RW, Locality, Data});
14540 }
14541 case X86::BI_mm_clflush: {
14542 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
14543 Ops[0]);
14544 }
14545 case X86::BI_mm_lfence: {
14546 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
14547 }
14548 case X86::BI_mm_mfence: {
14549 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
14550 }
14551 case X86::BI_mm_sfence: {
14552 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
14553 }
14554 case X86::BI_mm_pause: {
14555 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
14556 }
14557 case X86::BI__rdtsc: {
14558 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
14559 }
14560 case X86::BI__builtin_ia32_rdtscp: {
14561 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
14562 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
14563 Ops[0]);
14564 return Builder.CreateExtractValue(Call, 0);
14565 }
14566 case X86::BI__builtin_ia32_lzcnt_u16:
14567 case X86::BI__builtin_ia32_lzcnt_u32:
14568 case X86::BI__builtin_ia32_lzcnt_u64: {
14569 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14570 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14571 }
14572 case X86::BI__builtin_ia32_tzcnt_u16:
14573 case X86::BI__builtin_ia32_tzcnt_u32:
14574 case X86::BI__builtin_ia32_tzcnt_u64: {
14575 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
14576 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14577 }
14578 case X86::BI__builtin_ia32_undef128:
14579 case X86::BI__builtin_ia32_undef256:
14580 case X86::BI__builtin_ia32_undef512:
14581 // The x86 definition of "undef" is not the same as the LLVM definition
14582 // (PR32176). We leave optimizing away an unnecessary zero constant to the
14583 // IR optimizer and backend.
14584 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
14585 // value, we should use that here instead of a zero.
14586 return llvm::Constant::getNullValue(ConvertType(E->getType()));
14587 case X86::BI__builtin_ia32_vec_ext_v4hi:
14588 case X86::BI__builtin_ia32_vec_ext_v16qi:
14589 case X86::BI__builtin_ia32_vec_ext_v8hi:
14590 case X86::BI__builtin_ia32_vec_ext_v4si:
14591 case X86::BI__builtin_ia32_vec_ext_v4sf:
14592 case X86::BI__builtin_ia32_vec_ext_v2di:
14593 case X86::BI__builtin_ia32_vec_ext_v32qi:
14594 case X86::BI__builtin_ia32_vec_ext_v16hi:
14595 case X86::BI__builtin_ia32_vec_ext_v8si:
14596 case X86::BI__builtin_ia32_vec_ext_v4di: {
14597 unsigned NumElts =
14598 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14599 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14600 Index &= NumElts - 1;
14601 // These builtins exist so we can ensure the index is an ICE and in range.
14602 // Otherwise we could just do this in the header file.
14603 return Builder.CreateExtractElement(Ops[0], Index);
14604 }
14605 case X86::BI__builtin_ia32_vec_set_v4hi:
14606 case X86::BI__builtin_ia32_vec_set_v16qi:
14607 case X86::BI__builtin_ia32_vec_set_v8hi:
14608 case X86::BI__builtin_ia32_vec_set_v4si:
14609 case X86::BI__builtin_ia32_vec_set_v2di:
14610 case X86::BI__builtin_ia32_vec_set_v32qi:
14611 case X86::BI__builtin_ia32_vec_set_v16hi:
14612 case X86::BI__builtin_ia32_vec_set_v8si:
14613 case X86::BI__builtin_ia32_vec_set_v4di: {
14614 unsigned NumElts =
14615 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14616 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14617 Index &= NumElts - 1;
14618 // These builtins exist so we can ensure the index is an ICE and in range.
14619 // Otherwise we could just do this in the header file.
14620 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
14621 }
14622 case X86::BI_mm_setcsr:
14623 case X86::BI__builtin_ia32_ldmxcsr: {
14624 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
14625 Builder.CreateStore(Ops[0], Tmp);
14626 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
14627 Tmp.getPointer());
14628 }
14629 case X86::BI_mm_getcsr:
14630 case X86::BI__builtin_ia32_stmxcsr: {
14632 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
14633 Tmp.getPointer());
14634 return Builder.CreateLoad(Tmp, "stmxcsr");
14635 }
14636 case X86::BI__builtin_ia32_xsave:
14637 case X86::BI__builtin_ia32_xsave64:
14638 case X86::BI__builtin_ia32_xrstor:
14639 case X86::BI__builtin_ia32_xrstor64:
14640 case X86::BI__builtin_ia32_xsaveopt:
14641 case X86::BI__builtin_ia32_xsaveopt64:
14642 case X86::BI__builtin_ia32_xrstors:
14643 case X86::BI__builtin_ia32_xrstors64:
14644 case X86::BI__builtin_ia32_xsavec:
14645 case X86::BI__builtin_ia32_xsavec64:
14646 case X86::BI__builtin_ia32_xsaves:
14647 case X86::BI__builtin_ia32_xsaves64:
14648 case X86::BI__builtin_ia32_xsetbv:
14649 case X86::BI_xsetbv: {
14650 Intrinsic::ID ID;
14651#define INTRINSIC_X86_XSAVE_ID(NAME) \
14652 case X86::BI__builtin_ia32_##NAME: \
14653 ID = Intrinsic::x86_##NAME; \
14654 break
14655 switch (BuiltinID) {
14656 default: llvm_unreachable("Unsupported intrinsic!");
14658 INTRINSIC_X86_XSAVE_ID(xsave64);
14659 INTRINSIC_X86_XSAVE_ID(xrstor);
14660 INTRINSIC_X86_XSAVE_ID(xrstor64);
14661 INTRINSIC_X86_XSAVE_ID(xsaveopt);
14662 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
14663 INTRINSIC_X86_XSAVE_ID(xrstors);
14664 INTRINSIC_X86_XSAVE_ID(xrstors64);
14665 INTRINSIC_X86_XSAVE_ID(xsavec);
14666 INTRINSIC_X86_XSAVE_ID(xsavec64);
14667 INTRINSIC_X86_XSAVE_ID(xsaves);
14668 INTRINSIC_X86_XSAVE_ID(xsaves64);
14669 INTRINSIC_X86_XSAVE_ID(xsetbv);
14670 case X86::BI_xsetbv:
14671 ID = Intrinsic::x86_xsetbv;
14672 break;
14673 }
14674#undef INTRINSIC_X86_XSAVE_ID
14675 Value *Mhi = Builder.CreateTrunc(
14676 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
14677 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
14678 Ops[1] = Mhi;
14679 Ops.push_back(Mlo);
14680 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14681 }
14682 case X86::BI__builtin_ia32_xgetbv:
14683 case X86::BI_xgetbv:
14684 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
14685 case X86::BI__builtin_ia32_storedqudi128_mask:
14686 case X86::BI__builtin_ia32_storedqusi128_mask:
14687 case X86::BI__builtin_ia32_storedquhi128_mask:
14688 case X86::BI__builtin_ia32_storedquqi128_mask:
14689 case X86::BI__builtin_ia32_storeupd128_mask:
14690 case X86::BI__builtin_ia32_storeups128_mask:
14691 case X86::BI__builtin_ia32_storedqudi256_mask:
14692 case X86::BI__builtin_ia32_storedqusi256_mask:
14693 case X86::BI__builtin_ia32_storedquhi256_mask:
14694 case X86::BI__builtin_ia32_storedquqi256_mask:
14695 case X86::BI__builtin_ia32_storeupd256_mask:
14696 case X86::BI__builtin_ia32_storeups256_mask:
14697 case X86::BI__builtin_ia32_storedqudi512_mask:
14698 case X86::BI__builtin_ia32_storedqusi512_mask:
14699 case X86::BI__builtin_ia32_storedquhi512_mask:
14700 case X86::BI__builtin_ia32_storedquqi512_mask:
14701 case X86::BI__builtin_ia32_storeupd512_mask:
14702 case X86::BI__builtin_ia32_storeups512_mask:
14703 return EmitX86MaskedStore(*this, Ops, Align(1));
14704
14705 case X86::BI__builtin_ia32_storesh128_mask:
14706 case X86::BI__builtin_ia32_storess128_mask:
14707 case X86::BI__builtin_ia32_storesd128_mask:
14708 return EmitX86MaskedStore(*this, Ops, Align(1));
14709
14710 case X86::BI__builtin_ia32_vpopcntb_128:
14711 case X86::BI__builtin_ia32_vpopcntd_128:
14712 case X86::BI__builtin_ia32_vpopcntq_128:
14713 case X86::BI__builtin_ia32_vpopcntw_128:
14714 case X86::BI__builtin_ia32_vpopcntb_256:
14715 case X86::BI__builtin_ia32_vpopcntd_256:
14716 case X86::BI__builtin_ia32_vpopcntq_256:
14717 case X86::BI__builtin_ia32_vpopcntw_256:
14718 case X86::BI__builtin_ia32_vpopcntb_512:
14719 case X86::BI__builtin_ia32_vpopcntd_512:
14720 case X86::BI__builtin_ia32_vpopcntq_512:
14721 case X86::BI__builtin_ia32_vpopcntw_512: {
14722 llvm::Type *ResultType = ConvertType(E->getType());
14723 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
14724 return Builder.CreateCall(F, Ops);
14725 }
14726 case X86::BI__builtin_ia32_cvtmask2b128:
14727 case X86::BI__builtin_ia32_cvtmask2b256:
14728 case X86::BI__builtin_ia32_cvtmask2b512:
14729 case X86::BI__builtin_ia32_cvtmask2w128:
14730 case X86::BI__builtin_ia32_cvtmask2w256:
14731 case X86::BI__builtin_ia32_cvtmask2w512:
14732 case X86::BI__builtin_ia32_cvtmask2d128:
14733 case X86::BI__builtin_ia32_cvtmask2d256:
14734 case X86::BI__builtin_ia32_cvtmask2d512:
14735 case X86::BI__builtin_ia32_cvtmask2q128:
14736 case X86::BI__builtin_ia32_cvtmask2q256:
14737 case X86::BI__builtin_ia32_cvtmask2q512:
14738 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
14739
14740 case X86::BI__builtin_ia32_cvtb2mask128:
14741 case X86::BI__builtin_ia32_cvtb2mask256:
14742 case X86::BI__builtin_ia32_cvtb2mask512:
14743 case X86::BI__builtin_ia32_cvtw2mask128:
14744 case X86::BI__builtin_ia32_cvtw2mask256:
14745 case X86::BI__builtin_ia32_cvtw2mask512:
14746 case X86::BI__builtin_ia32_cvtd2mask128:
14747 case X86::BI__builtin_ia32_cvtd2mask256:
14748 case X86::BI__builtin_ia32_cvtd2mask512:
14749 case X86::BI__builtin_ia32_cvtq2mask128:
14750 case X86::BI__builtin_ia32_cvtq2mask256:
14751 case X86::BI__builtin_ia32_cvtq2mask512:
14752 return EmitX86ConvertToMask(*this, Ops[0]);
14753
14754 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
14755 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
14756 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
14757 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
14758 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
14759 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
14760 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
14761 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
14762 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
14763 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
14764 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
14765 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
14766 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
14767 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
14768 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
14769 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
14770 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
14771 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
14772 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
14773 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
14774 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
14775 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
14776 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
14777 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
14778 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
14779 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
14780
14781 case X86::BI__builtin_ia32_vfmaddss3:
14782 case X86::BI__builtin_ia32_vfmaddsd3:
14783 case X86::BI__builtin_ia32_vfmaddsh3_mask:
14784 case X86::BI__builtin_ia32_vfmaddss3_mask:
14785 case X86::BI__builtin_ia32_vfmaddsd3_mask:
14786 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
14787 case X86::BI__builtin_ia32_vfmaddss:
14788 case X86::BI__builtin_ia32_vfmaddsd:
14789 return EmitScalarFMAExpr(*this, E, Ops,
14790 Constant::getNullValue(Ops[0]->getType()));
14791 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
14792 case X86::BI__builtin_ia32_vfmaddss3_maskz:
14793 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
14794 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
14795 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
14796 case X86::BI__builtin_ia32_vfmaddss3_mask3:
14797 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
14798 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
14799 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
14800 case X86::BI__builtin_ia32_vfmsubss3_mask3:
14801 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
14802 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
14803 /*NegAcc*/ true);
14804 case X86::BI__builtin_ia32_vfmaddph:
14805 case X86::BI__builtin_ia32_vfmaddps:
14806 case X86::BI__builtin_ia32_vfmaddpd:
14807 case X86::BI__builtin_ia32_vfmaddph256:
14808 case X86::BI__builtin_ia32_vfmaddps256:
14809 case X86::BI__builtin_ia32_vfmaddpd256:
14810 case X86::BI__builtin_ia32_vfmaddph512_mask:
14811 case X86::BI__builtin_ia32_vfmaddph512_maskz:
14812 case X86::BI__builtin_ia32_vfmaddph512_mask3:
14813 case X86::BI__builtin_ia32_vfmaddps512_mask:
14814 case X86::BI__builtin_ia32_vfmaddps512_maskz:
14815 case X86::BI__builtin_ia32_vfmaddps512_mask3:
14816 case X86::BI__builtin_ia32_vfmsubps512_mask3:
14817 case X86::BI__builtin_ia32_vfmaddpd512_mask:
14818 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
14819 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
14820 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
14821 case X86::BI__builtin_ia32_vfmsubph512_mask3:
14822 case X86::BI__builtin_ia32_vfmaddph256_round_mask:
14823 case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14824 case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14825 case X86::BI__builtin_ia32_vfmaddps256_round_mask:
14826 case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14827 case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14828 case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14829 case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14830 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14831 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14832 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14833 case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14834 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
14835 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
14836 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14837 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14838 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14839 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
14840 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14841 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14842 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14843 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14844 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14845 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14846 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14847 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14848 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14849 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14850 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14851 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14852 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14853 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14854 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14855 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14856 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14857 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14858 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14859 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
14860
14861 case X86::BI__builtin_ia32_movdqa32store128_mask:
14862 case X86::BI__builtin_ia32_movdqa64store128_mask:
14863 case X86::BI__builtin_ia32_storeaps128_mask:
14864 case X86::BI__builtin_ia32_storeapd128_mask:
14865 case X86::BI__builtin_ia32_movdqa32store256_mask:
14866 case X86::BI__builtin_ia32_movdqa64store256_mask:
14867 case X86::BI__builtin_ia32_storeaps256_mask:
14868 case X86::BI__builtin_ia32_storeapd256_mask:
14869 case X86::BI__builtin_ia32_movdqa32store512_mask:
14870 case X86::BI__builtin_ia32_movdqa64store512_mask:
14871 case X86::BI__builtin_ia32_storeaps512_mask:
14872 case X86::BI__builtin_ia32_storeapd512_mask:
14873 return EmitX86MaskedStore(
14874 *this, Ops,
14875 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14876
14877 case X86::BI__builtin_ia32_loadups128_mask:
14878 case X86::BI__builtin_ia32_loadups256_mask:
14879 case X86::BI__builtin_ia32_loadups512_mask:
14880 case X86::BI__builtin_ia32_loadupd128_mask:
14881 case X86::BI__builtin_ia32_loadupd256_mask:
14882 case X86::BI__builtin_ia32_loadupd512_mask:
14883 case X86::BI__builtin_ia32_loaddquqi128_mask:
14884 case X86::BI__builtin_ia32_loaddquqi256_mask:
14885 case X86::BI__builtin_ia32_loaddquqi512_mask:
14886 case X86::BI__builtin_ia32_loaddquhi128_mask:
14887 case X86::BI__builtin_ia32_loaddquhi256_mask:
14888 case X86::BI__builtin_ia32_loaddquhi512_mask:
14889 case X86::BI__builtin_ia32_loaddqusi128_mask:
14890 case X86::BI__builtin_ia32_loaddqusi256_mask:
14891 case X86::BI__builtin_ia32_loaddqusi512_mask:
14892 case X86::BI__builtin_ia32_loaddqudi128_mask:
14893 case X86::BI__builtin_ia32_loaddqudi256_mask:
14894 case X86::BI__builtin_ia32_loaddqudi512_mask:
14895 return EmitX86MaskedLoad(*this, Ops, Align(1));
14896
14897 case X86::BI__builtin_ia32_loadsh128_mask:
14898 case X86::BI__builtin_ia32_loadss128_mask:
14899 case X86::BI__builtin_ia32_loadsd128_mask:
14900 return EmitX86MaskedLoad(*this, Ops, Align(1));
14901
14902 case X86::BI__builtin_ia32_loadaps128_mask:
14903 case X86::BI__builtin_ia32_loadaps256_mask:
14904 case X86::BI__builtin_ia32_loadaps512_mask:
14905 case X86::BI__builtin_ia32_loadapd128_mask:
14906 case X86::BI__builtin_ia32_loadapd256_mask:
14907 case X86::BI__builtin_ia32_loadapd512_mask:
14908 case X86::BI__builtin_ia32_movdqa32load128_mask:
14909 case X86::BI__builtin_ia32_movdqa32load256_mask:
14910 case X86::BI__builtin_ia32_movdqa32load512_mask:
14911 case X86::BI__builtin_ia32_movdqa64load128_mask:
14912 case X86::BI__builtin_ia32_movdqa64load256_mask:
14913 case X86::BI__builtin_ia32_movdqa64load512_mask:
14914 return EmitX86MaskedLoad(
14915 *this, Ops,
14916 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14917
14918 case X86::BI__builtin_ia32_expandloaddf128_mask:
14919 case X86::BI__builtin_ia32_expandloaddf256_mask:
14920 case X86::BI__builtin_ia32_expandloaddf512_mask:
14921 case X86::BI__builtin_ia32_expandloadsf128_mask:
14922 case X86::BI__builtin_ia32_expandloadsf256_mask:
14923 case X86::BI__builtin_ia32_expandloadsf512_mask:
14924 case X86::BI__builtin_ia32_expandloaddi128_mask:
14925 case X86::BI__builtin_ia32_expandloaddi256_mask:
14926 case X86::BI__builtin_ia32_expandloaddi512_mask:
14927 case X86::BI__builtin_ia32_expandloadsi128_mask:
14928 case X86::BI__builtin_ia32_expandloadsi256_mask:
14929 case X86::BI__builtin_ia32_expandloadsi512_mask:
14930 case X86::BI__builtin_ia32_expandloadhi128_mask:
14931 case X86::BI__builtin_ia32_expandloadhi256_mask:
14932 case X86::BI__builtin_ia32_expandloadhi512_mask:
14933 case X86::BI__builtin_ia32_expandloadqi128_mask:
14934 case X86::BI__builtin_ia32_expandloadqi256_mask:
14935 case X86::BI__builtin_ia32_expandloadqi512_mask:
14936 return EmitX86ExpandLoad(*this, Ops);
14937
14938 case X86::BI__builtin_ia32_compressstoredf128_mask:
14939 case X86::BI__builtin_ia32_compressstoredf256_mask:
14940 case X86::BI__builtin_ia32_compressstoredf512_mask:
14941 case X86::BI__builtin_ia32_compressstoresf128_mask:
14942 case X86::BI__builtin_ia32_compressstoresf256_mask:
14943 case X86::BI__builtin_ia32_compressstoresf512_mask:
14944 case X86::BI__builtin_ia32_compressstoredi128_mask:
14945 case X86::BI__builtin_ia32_compressstoredi256_mask:
14946 case X86::BI__builtin_ia32_compressstoredi512_mask:
14947 case X86::BI__builtin_ia32_compressstoresi128_mask:
14948 case X86::BI__builtin_ia32_compressstoresi256_mask:
14949 case X86::BI__builtin_ia32_compressstoresi512_mask:
14950 case X86::BI__builtin_ia32_compressstorehi128_mask:
14951 case X86::BI__builtin_ia32_compressstorehi256_mask:
14952 case X86::BI__builtin_ia32_compressstorehi512_mask:
14953 case X86::BI__builtin_ia32_compressstoreqi128_mask:
14954 case X86::BI__builtin_ia32_compressstoreqi256_mask:
14955 case X86::BI__builtin_ia32_compressstoreqi512_mask:
14956 return EmitX86CompressStore(*this, Ops);
14957
14958 case X86::BI__builtin_ia32_expanddf128_mask:
14959 case X86::BI__builtin_ia32_expanddf256_mask:
14960 case X86::BI__builtin_ia32_expanddf512_mask:
14961 case X86::BI__builtin_ia32_expandsf128_mask:
14962 case X86::BI__builtin_ia32_expandsf256_mask:
14963 case X86::BI__builtin_ia32_expandsf512_mask:
14964 case X86::BI__builtin_ia32_expanddi128_mask:
14965 case X86::BI__builtin_ia32_expanddi256_mask:
14966 case X86::BI__builtin_ia32_expanddi512_mask:
14967 case X86::BI__builtin_ia32_expandsi128_mask:
14968 case X86::BI__builtin_ia32_expandsi256_mask:
14969 case X86::BI__builtin_ia32_expandsi512_mask:
14970 case X86::BI__builtin_ia32_expandhi128_mask:
14971 case X86::BI__builtin_ia32_expandhi256_mask:
14972 case X86::BI__builtin_ia32_expandhi512_mask:
14973 case X86::BI__builtin_ia32_expandqi128_mask:
14974 case X86::BI__builtin_ia32_expandqi256_mask:
14975 case X86::BI__builtin_ia32_expandqi512_mask:
14976 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
14977
14978 case X86::BI__builtin_ia32_compressdf128_mask:
14979 case X86::BI__builtin_ia32_compressdf256_mask:
14980 case X86::BI__builtin_ia32_compressdf512_mask:
14981 case X86::BI__builtin_ia32_compresssf128_mask:
14982 case X86::BI__builtin_ia32_compresssf256_mask:
14983 case X86::BI__builtin_ia32_compresssf512_mask:
14984 case X86::BI__builtin_ia32_compressdi128_mask:
14985 case X86::BI__builtin_ia32_compressdi256_mask:
14986 case X86::BI__builtin_ia32_compressdi512_mask:
14987 case X86::BI__builtin_ia32_compresssi128_mask:
14988 case X86::BI__builtin_ia32_compresssi256_mask:
14989 case X86::BI__builtin_ia32_compresssi512_mask:
14990 case X86::BI__builtin_ia32_compresshi128_mask:
14991 case X86::BI__builtin_ia32_compresshi256_mask:
14992 case X86::BI__builtin_ia32_compresshi512_mask:
14993 case X86::BI__builtin_ia32_compressqi128_mask:
14994 case X86::BI__builtin_ia32_compressqi256_mask:
14995 case X86::BI__builtin_ia32_compressqi512_mask:
14996 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
14997
14998 case X86::BI__builtin_ia32_gather3div2df:
14999 case X86::BI__builtin_ia32_gather3div2di:
15000 case X86::BI__builtin_ia32_gather3div4df:
15001 case X86::BI__builtin_ia32_gather3div4di:
15002 case X86::BI__builtin_ia32_gather3div4sf:
15003 case X86::BI__builtin_ia32_gather3div4si:
15004 case X86::BI__builtin_ia32_gather3div8sf:
15005 case X86::BI__builtin_ia32_gather3div8si:
15006 case X86::BI__builtin_ia32_gather3siv2df:
15007 case X86::BI__builtin_ia32_gather3siv2di:
15008 case X86::BI__builtin_ia32_gather3siv4df:
15009 case X86::BI__builtin_ia32_gather3siv4di:
15010 case X86::BI__builtin_ia32_gather3siv4sf:
15011 case X86::BI__builtin_ia32_gather3siv4si:
15012 case X86::BI__builtin_ia32_gather3siv8sf:
15013 case X86::BI__builtin_ia32_gather3siv8si:
15014 case X86::BI__builtin_ia32_gathersiv8df:
15015 case X86::BI__builtin_ia32_gathersiv16sf:
15016 case X86::BI__builtin_ia32_gatherdiv8df:
15017 case X86::BI__builtin_ia32_gatherdiv16sf:
15018 case X86::BI__builtin_ia32_gathersiv8di:
15019 case X86::BI__builtin_ia32_gathersiv16si:
15020 case X86::BI__builtin_ia32_gatherdiv8di:
15021 case X86::BI__builtin_ia32_gatherdiv16si: {
15022 Intrinsic::ID IID;
15023 switch (BuiltinID) {
15024 default: llvm_unreachable("Unexpected builtin");
15025 case X86::BI__builtin_ia32_gather3div2df:
15026 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
15027 break;
15028 case X86::BI__builtin_ia32_gather3div2di:
15029 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
15030 break;
15031 case X86::BI__builtin_ia32_gather3div4df:
15032 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
15033 break;
15034 case X86::BI__builtin_ia32_gather3div4di:
15035 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
15036 break;
15037 case X86::BI__builtin_ia32_gather3div4sf:
15038 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
15039 break;
15040 case X86::BI__builtin_ia32_gather3div4si:
15041 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
15042 break;
15043 case X86::BI__builtin_ia32_gather3div8sf:
15044 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
15045 break;
15046 case X86::BI__builtin_ia32_gather3div8si:
15047 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
15048 break;
15049 case X86::BI__builtin_ia32_gather3siv2df:
15050 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
15051 break;
15052 case X86::BI__builtin_ia32_gather3siv2di:
15053 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
15054 break;
15055 case X86::BI__builtin_ia32_gather3siv4df:
15056 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
15057 break;
15058 case X86::BI__builtin_ia32_gather3siv4di:
15059 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
15060 break;
15061 case X86::BI__builtin_ia32_gather3siv4sf:
15062 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
15063 break;
15064 case X86::BI__builtin_ia32_gather3siv4si:
15065 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
15066 break;
15067 case X86::BI__builtin_ia32_gather3siv8sf:
15068 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
15069 break;
15070 case X86::BI__builtin_ia32_gather3siv8si:
15071 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
15072 break;
15073 case X86::BI__builtin_ia32_gathersiv8df:
15074 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
15075 break;
15076 case X86::BI__builtin_ia32_gathersiv16sf:
15077 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
15078 break;
15079 case X86::BI__builtin_ia32_gatherdiv8df:
15080 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
15081 break;
15082 case X86::BI__builtin_ia32_gatherdiv16sf:
15083 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
15084 break;
15085 case X86::BI__builtin_ia32_gathersiv8di:
15086 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
15087 break;
15088 case X86::BI__builtin_ia32_gathersiv16si:
15089 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
15090 break;
15091 case X86::BI__builtin_ia32_gatherdiv8di:
15092 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
15093 break;
15094 case X86::BI__builtin_ia32_gatherdiv16si:
15095 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
15096 break;
15097 }
15098
15099 unsigned MinElts = std::min(
15100 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15101 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15102 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
15103 Function *Intr = CGM.getIntrinsic(IID);
15104 return Builder.CreateCall(Intr, Ops);
15105 }
15106
15107 case X86::BI__builtin_ia32_scattersiv8df:
15108 case X86::BI__builtin_ia32_scattersiv16sf:
15109 case X86::BI__builtin_ia32_scatterdiv8df:
15110 case X86::BI__builtin_ia32_scatterdiv16sf:
15111 case X86::BI__builtin_ia32_scattersiv8di:
15112 case X86::BI__builtin_ia32_scattersiv16si:
15113 case X86::BI__builtin_ia32_scatterdiv8di:
15114 case X86::BI__builtin_ia32_scatterdiv16si:
15115 case X86::BI__builtin_ia32_scatterdiv2df:
15116 case X86::BI__builtin_ia32_scatterdiv2di:
15117 case X86::BI__builtin_ia32_scatterdiv4df:
15118 case X86::BI__builtin_ia32_scatterdiv4di:
15119 case X86::BI__builtin_ia32_scatterdiv4sf:
15120 case X86::BI__builtin_ia32_scatterdiv4si:
15121 case X86::BI__builtin_ia32_scatterdiv8sf:
15122 case X86::BI__builtin_ia32_scatterdiv8si:
15123 case X86::BI__builtin_ia32_scattersiv2df:
15124 case X86::BI__builtin_ia32_scattersiv2di:
15125 case X86::BI__builtin_ia32_scattersiv4df:
15126 case X86::BI__builtin_ia32_scattersiv4di:
15127 case X86::BI__builtin_ia32_scattersiv4sf:
15128 case X86::BI__builtin_ia32_scattersiv4si:
15129 case X86::BI__builtin_ia32_scattersiv8sf:
15130 case X86::BI__builtin_ia32_scattersiv8si: {
15131 Intrinsic::ID IID;
15132 switch (BuiltinID) {
15133 default: llvm_unreachable("Unexpected builtin");
15134 case X86::BI__builtin_ia32_scattersiv8df:
15135 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15136 break;
15137 case X86::BI__builtin_ia32_scattersiv16sf:
15138 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15139 break;
15140 case X86::BI__builtin_ia32_scatterdiv8df:
15141 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15142 break;
15143 case X86::BI__builtin_ia32_scatterdiv16sf:
15144 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15145 break;
15146 case X86::BI__builtin_ia32_scattersiv8di:
15147 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15148 break;
15149 case X86::BI__builtin_ia32_scattersiv16si:
15150 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15151 break;
15152 case X86::BI__builtin_ia32_scatterdiv8di:
15153 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15154 break;
15155 case X86::BI__builtin_ia32_scatterdiv16si:
15156 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15157 break;
15158 case X86::BI__builtin_ia32_scatterdiv2df:
15159 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15160 break;
15161 case X86::BI__builtin_ia32_scatterdiv2di:
15162 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15163 break;
15164 case X86::BI__builtin_ia32_scatterdiv4df:
15165 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15166 break;
15167 case X86::BI__builtin_ia32_scatterdiv4di:
15168 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15169 break;
15170 case X86::BI__builtin_ia32_scatterdiv4sf:
15171 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15172 break;
15173 case X86::BI__builtin_ia32_scatterdiv4si:
15174 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15175 break;
15176 case X86::BI__builtin_ia32_scatterdiv8sf:
15177 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15178 break;
15179 case X86::BI__builtin_ia32_scatterdiv8si:
15180 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15181 break;
15182 case X86::BI__builtin_ia32_scattersiv2df:
15183 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15184 break;
15185 case X86::BI__builtin_ia32_scattersiv2di:
15186 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15187 break;
15188 case X86::BI__builtin_ia32_scattersiv4df:
15189 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15190 break;
15191 case X86::BI__builtin_ia32_scattersiv4di:
15192 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15193 break;
15194 case X86::BI__builtin_ia32_scattersiv4sf:
15195 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15196 break;
15197 case X86::BI__builtin_ia32_scattersiv4si:
15198 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15199 break;
15200 case X86::BI__builtin_ia32_scattersiv8sf:
15201 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15202 break;
15203 case X86::BI__builtin_ia32_scattersiv8si:
15204 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15205 break;
15206 }
15207
15208 unsigned MinElts = std::min(
15209 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15210 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15211 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15212 Function *Intr = CGM.getIntrinsic(IID);
15213 return Builder.CreateCall(Intr, Ops);
15214 }
15215
15216 case X86::BI__builtin_ia32_vextractf128_pd256:
15217 case X86::BI__builtin_ia32_vextractf128_ps256:
15218 case X86::BI__builtin_ia32_vextractf128_si256:
15219 case X86::BI__builtin_ia32_extract128i256:
15220 case X86::BI__builtin_ia32_extractf64x4_mask:
15221 case X86::BI__builtin_ia32_extractf32x4_mask:
15222 case X86::BI__builtin_ia32_extracti64x4_mask:
15223 case X86::BI__builtin_ia32_extracti32x4_mask:
15224 case X86::BI__builtin_ia32_extractf32x8_mask:
15225 case X86::BI__builtin_ia32_extracti32x8_mask:
15226 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15227 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15228 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15229 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15230 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15231 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15232 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15233 unsigned NumElts = DstTy->getNumElements();
15234 unsigned SrcNumElts =
15235 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15236 unsigned SubVectors = SrcNumElts / NumElts;
15237 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15238 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15239 Index &= SubVectors - 1; // Remove any extra bits.
15240 Index *= NumElts;
15241
15242 int Indices[16];
15243 for (unsigned i = 0; i != NumElts; ++i)
15244 Indices[i] = i + Index;
15245
15246 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15247 "extract");
15248
15249 if (Ops.size() == 4)
15250 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15251
15252 return Res;
15253 }
15254 case X86::BI__builtin_ia32_vinsertf128_pd256:
15255 case X86::BI__builtin_ia32_vinsertf128_ps256:
15256 case X86::BI__builtin_ia32_vinsertf128_si256:
15257 case X86::BI__builtin_ia32_insert128i256:
15258 case X86::BI__builtin_ia32_insertf64x4:
15259 case X86::BI__builtin_ia32_insertf32x4:
15260 case X86::BI__builtin_ia32_inserti64x4:
15261 case X86::BI__builtin_ia32_inserti32x4:
15262 case X86::BI__builtin_ia32_insertf32x8:
15263 case X86::BI__builtin_ia32_inserti32x8:
15264 case X86::BI__builtin_ia32_insertf32x4_256:
15265 case X86::BI__builtin_ia32_inserti32x4_256:
15266 case X86::BI__builtin_ia32_insertf64x2_256:
15267 case X86::BI__builtin_ia32_inserti64x2_256:
15268 case X86::BI__builtin_ia32_insertf64x2_512:
15269 case X86::BI__builtin_ia32_inserti64x2_512: {
15270 unsigned DstNumElts =
15271 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15272 unsigned SrcNumElts =
15273 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15274 unsigned SubVectors = DstNumElts / SrcNumElts;
15275 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15276 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15277 Index &= SubVectors - 1; // Remove any extra bits.
15278 Index *= SrcNumElts;
15279
15280 int Indices[16];
15281 for (unsigned i = 0; i != DstNumElts; ++i)
15282 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15283
15284 Value *Op1 = Builder.CreateShuffleVector(
15285 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15286
15287 for (unsigned i = 0; i != DstNumElts; ++i) {
15288 if (i >= Index && i < (Index + SrcNumElts))
15289 Indices[i] = (i - Index) + DstNumElts;
15290 else
15291 Indices[i] = i;
15292 }
15293
15294 return Builder.CreateShuffleVector(Ops[0], Op1,
15295 ArrayRef(Indices, DstNumElts), "insert");
15296 }
15297 case X86::BI__builtin_ia32_pmovqd512_mask:
15298 case X86::BI__builtin_ia32_pmovwb512_mask: {
15299 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15300 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15301 }
15302 case X86::BI__builtin_ia32_pmovdb512_mask:
15303 case X86::BI__builtin_ia32_pmovdw512_mask:
15304 case X86::BI__builtin_ia32_pmovqw512_mask: {
15305 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15306 if (C->isAllOnesValue())
15307 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15308
15309 Intrinsic::ID IID;
15310 switch (BuiltinID) {
15311 default: llvm_unreachable("Unsupported intrinsic!");
15312 case X86::BI__builtin_ia32_pmovdb512_mask:
15313 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15314 break;
15315 case X86::BI__builtin_ia32_pmovdw512_mask:
15316 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15317 break;
15318 case X86::BI__builtin_ia32_pmovqw512_mask:
15319 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15320 break;
15321 }
15322
15323 Function *Intr = CGM.getIntrinsic(IID);
15324 return Builder.CreateCall(Intr, Ops);
15325 }
15326 case X86::BI__builtin_ia32_pblendw128:
15327 case X86::BI__builtin_ia32_blendpd:
15328 case X86::BI__builtin_ia32_blendps:
15329 case X86::BI__builtin_ia32_blendpd256:
15330 case X86::BI__builtin_ia32_blendps256:
15331 case X86::BI__builtin_ia32_pblendw256:
15332 case X86::BI__builtin_ia32_pblendd128:
15333 case X86::BI__builtin_ia32_pblendd256: {
15334 unsigned NumElts =
15335 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15336 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15337
15338 int Indices[16];
15339 // If there are more than 8 elements, the immediate is used twice so make
15340 // sure we handle that.
15341 for (unsigned i = 0; i != NumElts; ++i)
15342 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15343
15344 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15345 ArrayRef(Indices, NumElts), "blend");
15346 }
15347 case X86::BI__builtin_ia32_pshuflw:
15348 case X86::BI__builtin_ia32_pshuflw256:
15349 case X86::BI__builtin_ia32_pshuflw512: {
15350 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15351 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15352 unsigned NumElts = Ty->getNumElements();
15353
15354 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15355 Imm = (Imm & 0xff) * 0x01010101;
15356
15357 int Indices[32];
15358 for (unsigned l = 0; l != NumElts; l += 8) {
15359 for (unsigned i = 0; i != 4; ++i) {
15360 Indices[l + i] = l + (Imm & 3);
15361 Imm >>= 2;
15362 }
15363 for (unsigned i = 4; i != 8; ++i)
15364 Indices[l + i] = l + i;
15365 }
15366
15367 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15368 "pshuflw");
15369 }
15370 case X86::BI__builtin_ia32_pshufhw:
15371 case X86::BI__builtin_ia32_pshufhw256:
15372 case X86::BI__builtin_ia32_pshufhw512: {
15373 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15374 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15375 unsigned NumElts = Ty->getNumElements();
15376
15377 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15378 Imm = (Imm & 0xff) * 0x01010101;
15379
15380 int Indices[32];
15381 for (unsigned l = 0; l != NumElts; l += 8) {
15382 for (unsigned i = 0; i != 4; ++i)
15383 Indices[l + i] = l + i;
15384 for (unsigned i = 4; i != 8; ++i) {
15385 Indices[l + i] = l + 4 + (Imm & 3);
15386 Imm >>= 2;
15387 }
15388 }
15389
15390 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15391 "pshufhw");
15392 }
15393 case X86::BI__builtin_ia32_pshufd:
15394 case X86::BI__builtin_ia32_pshufd256:
15395 case X86::BI__builtin_ia32_pshufd512:
15396 case X86::BI__builtin_ia32_vpermilpd:
15397 case X86::BI__builtin_ia32_vpermilps:
15398 case X86::BI__builtin_ia32_vpermilpd256:
15399 case X86::BI__builtin_ia32_vpermilps256:
15400 case X86::BI__builtin_ia32_vpermilpd512:
15401 case X86::BI__builtin_ia32_vpermilps512: {
15402 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15403 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15404 unsigned NumElts = Ty->getNumElements();
15405 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15406 unsigned NumLaneElts = NumElts / NumLanes;
15407
15408 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15409 Imm = (Imm & 0xff) * 0x01010101;
15410
15411 int Indices[16];
15412 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15413 for (unsigned i = 0; i != NumLaneElts; ++i) {
15414 Indices[i + l] = (Imm % NumLaneElts) + l;
15415 Imm /= NumLaneElts;
15416 }
15417 }
15418
15419 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15420 "permil");
15421 }
15422 case X86::BI__builtin_ia32_shufpd:
15423 case X86::BI__builtin_ia32_shufpd256:
15424 case X86::BI__builtin_ia32_shufpd512:
15425 case X86::BI__builtin_ia32_shufps:
15426 case X86::BI__builtin_ia32_shufps256:
15427 case X86::BI__builtin_ia32_shufps512: {
15428 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15429 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15430 unsigned NumElts = Ty->getNumElements();
15431 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15432 unsigned NumLaneElts = NumElts / NumLanes;
15433
15434 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15435 Imm = (Imm & 0xff) * 0x01010101;
15436
15437 int Indices[16];
15438 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15439 for (unsigned i = 0; i != NumLaneElts; ++i) {
15440 unsigned Index = Imm % NumLaneElts;
15441 Imm /= NumLaneElts;
15442 if (i >= (NumLaneElts / 2))
15443 Index += NumElts;
15444 Indices[l + i] = l + Index;
15445 }
15446 }
15447
15448 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15449 ArrayRef(Indices, NumElts), "shufp");
15450 }
15451 case X86::BI__builtin_ia32_permdi256:
15452 case X86::BI__builtin_ia32_permdf256:
15453 case X86::BI__builtin_ia32_permdi512:
15454 case X86::BI__builtin_ia32_permdf512: {
15455 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15456 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15457 unsigned NumElts = Ty->getNumElements();
15458
15459 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15460 int Indices[8];
15461 for (unsigned l = 0; l != NumElts; l += 4)
15462 for (unsigned i = 0; i != 4; ++i)
15463 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15464
15465 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15466 "perm");
15467 }
15468 case X86::BI__builtin_ia32_palignr128:
15469 case X86::BI__builtin_ia32_palignr256:
15470 case X86::BI__builtin_ia32_palignr512: {
15471 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15472
15473 unsigned NumElts =
15474 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15475 assert(NumElts % 16 == 0);
15476
15477 // If palignr is shifting the pair of vectors more than the size of two
15478 // lanes, emit zero.
15479 if (ShiftVal >= 32)
15480 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15481
15482 // If palignr is shifting the pair of input vectors more than one lane,
15483 // but less than two lanes, convert to shifting in zeroes.
15484 if (ShiftVal > 16) {
15485 ShiftVal -= 16;
15486 Ops[1] = Ops[0];
15487 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
15488 }
15489
15490 int Indices[64];
15491 // 256-bit palignr operates on 128-bit lanes so we need to handle that
15492 for (unsigned l = 0; l != NumElts; l += 16) {
15493 for (unsigned i = 0; i != 16; ++i) {
15494 unsigned Idx = ShiftVal + i;
15495 if (Idx >= 16)
15496 Idx += NumElts - 16; // End of lane, switch operand.
15497 Indices[l + i] = Idx + l;
15498 }
15499 }
15500
15501 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15502 ArrayRef(Indices, NumElts), "palignr");
15503 }
15504 case X86::BI__builtin_ia32_alignd128:
15505 case X86::BI__builtin_ia32_alignd256:
15506 case X86::BI__builtin_ia32_alignd512:
15507 case X86::BI__builtin_ia32_alignq128:
15508 case X86::BI__builtin_ia32_alignq256:
15509 case X86::BI__builtin_ia32_alignq512: {
15510 unsigned NumElts =
15511 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15512 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15513
15514 // Mask the shift amount to width of a vector.
15515 ShiftVal &= NumElts - 1;
15516
15517 int Indices[16];
15518 for (unsigned i = 0; i != NumElts; ++i)
15519 Indices[i] = i + ShiftVal;
15520
15521 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15522 ArrayRef(Indices, NumElts), "valign");
15523 }
15524 case X86::BI__builtin_ia32_shuf_f32x4_256:
15525 case X86::BI__builtin_ia32_shuf_f64x2_256:
15526 case X86::BI__builtin_ia32_shuf_i32x4_256:
15527 case X86::BI__builtin_ia32_shuf_i64x2_256:
15528 case X86::BI__builtin_ia32_shuf_f32x4:
15529 case X86::BI__builtin_ia32_shuf_f64x2:
15530 case X86::BI__builtin_ia32_shuf_i32x4:
15531 case X86::BI__builtin_ia32_shuf_i64x2: {
15532 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15533 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15534 unsigned NumElts = Ty->getNumElements();
15535 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
15536 unsigned NumLaneElts = NumElts / NumLanes;
15537
15538 int Indices[16];
15539 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15540 unsigned Index = (Imm % NumLanes) * NumLaneElts;
15541 Imm /= NumLanes; // Discard the bits we just used.
15542 if (l >= (NumElts / 2))
15543 Index += NumElts; // Switch to other source.
15544 for (unsigned i = 0; i != NumLaneElts; ++i) {
15545 Indices[l + i] = Index + i;
15546 }
15547 }
15548
15549 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15550 ArrayRef(Indices, NumElts), "shuf");
15551 }
15552
15553 case X86::BI__builtin_ia32_vperm2f128_pd256:
15554 case X86::BI__builtin_ia32_vperm2f128_ps256:
15555 case X86::BI__builtin_ia32_vperm2f128_si256:
15556 case X86::BI__builtin_ia32_permti256: {
15557 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15558 unsigned NumElts =
15559 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15560
15561 // This takes a very simple approach since there are two lanes and a
15562 // shuffle can have 2 inputs. So we reserve the first input for the first
15563 // lane and the second input for the second lane. This may result in
15564 // duplicate sources, but this can be dealt with in the backend.
15565
15566 Value *OutOps[2];
15567 int Indices[8];
15568 for (unsigned l = 0; l != 2; ++l) {
15569 // Determine the source for this lane.
15570 if (Imm & (1 << ((l * 4) + 3)))
15571 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
15572 else if (Imm & (1 << ((l * 4) + 1)))
15573 OutOps[l] = Ops[1];
15574 else
15575 OutOps[l] = Ops[0];
15576
15577 for (unsigned i = 0; i != NumElts/2; ++i) {
15578 // Start with ith element of the source for this lane.
15579 unsigned Idx = (l * NumElts) + i;
15580 // If bit 0 of the immediate half is set, switch to the high half of
15581 // the source.
15582 if (Imm & (1 << (l * 4)))
15583 Idx += NumElts/2;
15584 Indices[(l * (NumElts/2)) + i] = Idx;
15585 }
15586 }
15587
15588 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
15589 ArrayRef(Indices, NumElts), "vperm");
15590 }
15591
15592 case X86::BI__builtin_ia32_pslldqi128_byteshift:
15593 case X86::BI__builtin_ia32_pslldqi256_byteshift:
15594 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
15595 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15596 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15597 // Builtin type is vXi64 so multiply by 8 to get bytes.
15598 unsigned NumElts = ResultType->getNumElements() * 8;
15599
15600 // If pslldq is shifting the vector more than 15 bytes, emit zero.
15601 if (ShiftVal >= 16)
15602 return llvm::Constant::getNullValue(ResultType);
15603
15604 int Indices[64];
15605 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
15606 for (unsigned l = 0; l != NumElts; l += 16) {
15607 for (unsigned i = 0; i != 16; ++i) {
15608 unsigned Idx = NumElts + i - ShiftVal;
15609 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
15610 Indices[l + i] = Idx + l;
15611 }
15612 }
15613
15614 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15615 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15616 Value *Zero = llvm::Constant::getNullValue(VecTy);
15617 Value *SV = Builder.CreateShuffleVector(
15618 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
15619 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
15620 }
15621 case X86::BI__builtin_ia32_psrldqi128_byteshift:
15622 case X86::BI__builtin_ia32_psrldqi256_byteshift:
15623 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
15624 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15625 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15626 // Builtin type is vXi64 so multiply by 8 to get bytes.
15627 unsigned NumElts = ResultType->getNumElements() * 8;
15628
15629 // If psrldq is shifting the vector more than 15 bytes, emit zero.
15630 if (ShiftVal >= 16)
15631 return llvm::Constant::getNullValue(ResultType);
15632
15633 int Indices[64];
15634 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
15635 for (unsigned l = 0; l != NumElts; l += 16) {
15636 for (unsigned i = 0; i != 16; ++i) {
15637 unsigned Idx = i + ShiftVal;
15638 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
15639 Indices[l + i] = Idx + l;
15640 }
15641 }
15642
15643 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15644 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15645 Value *Zero = llvm::Constant::getNullValue(VecTy);
15646 Value *SV = Builder.CreateShuffleVector(
15647 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
15648 return Builder.CreateBitCast(SV, ResultType, "cast");
15649 }
15650 case X86::BI__builtin_ia32_kshiftliqi:
15651 case X86::BI__builtin_ia32_kshiftlihi:
15652 case X86::BI__builtin_ia32_kshiftlisi:
15653 case X86::BI__builtin_ia32_kshiftlidi: {
15654 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15655 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15656
15657 if (ShiftVal >= NumElts)
15658 return llvm::Constant::getNullValue(Ops[0]->getType());
15659
15660 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15661
15662 int Indices[64];
15663 for (unsigned i = 0; i != NumElts; ++i)
15664 Indices[i] = NumElts + i - ShiftVal;
15665
15666 Value *Zero = llvm::Constant::getNullValue(In->getType());
15667 Value *SV = Builder.CreateShuffleVector(
15668 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
15669 return Builder.CreateBitCast(SV, Ops[0]->getType());
15670 }
15671 case X86::BI__builtin_ia32_kshiftriqi:
15672 case X86::BI__builtin_ia32_kshiftrihi:
15673 case X86::BI__builtin_ia32_kshiftrisi:
15674 case X86::BI__builtin_ia32_kshiftridi: {
15675 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15676 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15677
15678 if (ShiftVal >= NumElts)
15679 return llvm::Constant::getNullValue(Ops[0]->getType());
15680
15681 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15682
15683 int Indices[64];
15684 for (unsigned i = 0; i != NumElts; ++i)
15685 Indices[i] = i + ShiftVal;
15686
15687 Value *Zero = llvm::Constant::getNullValue(In->getType());
15688 Value *SV = Builder.CreateShuffleVector(
15689 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
15690 return Builder.CreateBitCast(SV, Ops[0]->getType());
15691 }
15692 case X86::BI__builtin_ia32_movnti:
15693 case X86::BI__builtin_ia32_movnti64:
15694 case X86::BI__builtin_ia32_movntsd:
15695 case X86::BI__builtin_ia32_movntss: {
15696 llvm::MDNode *Node = llvm::MDNode::get(
15697 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
15698
15699 Value *Ptr = Ops[0];
15700 Value *Src = Ops[1];
15701
15702 // Extract the 0'th element of the source vector.
15703 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
15704 BuiltinID == X86::BI__builtin_ia32_movntss)
15705 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
15706
15707 // Unaligned nontemporal store of the scalar value.
15708 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
15709 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
15710 SI->setAlignment(llvm::Align(1));
15711 return SI;
15712 }
15713 // Rotate is a special case of funnel shift - 1st 2 args are the same.
15714 case X86::BI__builtin_ia32_vprotb:
15715 case X86::BI__builtin_ia32_vprotw:
15716 case X86::BI__builtin_ia32_vprotd:
15717 case X86::BI__builtin_ia32_vprotq:
15718 case X86::BI__builtin_ia32_vprotbi:
15719 case X86::BI__builtin_ia32_vprotwi:
15720 case X86::BI__builtin_ia32_vprotdi:
15721 case X86::BI__builtin_ia32_vprotqi:
15722 case X86::BI__builtin_ia32_prold128:
15723 case X86::BI__builtin_ia32_prold256:
15724 case X86::BI__builtin_ia32_prold512:
15725 case X86::BI__builtin_ia32_prolq128:
15726 case X86::BI__builtin_ia32_prolq256:
15727 case X86::BI__builtin_ia32_prolq512:
15728 case X86::BI__builtin_ia32_prolvd128:
15729 case X86::BI__builtin_ia32_prolvd256:
15730 case X86::BI__builtin_ia32_prolvd512:
15731 case X86::BI__builtin_ia32_prolvq128:
15732 case X86::BI__builtin_ia32_prolvq256:
15733 case X86::BI__builtin_ia32_prolvq512:
15734 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
15735 case X86::BI__builtin_ia32_prord128:
15736 case X86::BI__builtin_ia32_prord256:
15737 case X86::BI__builtin_ia32_prord512:
15738 case X86::BI__builtin_ia32_prorq128:
15739 case X86::BI__builtin_ia32_prorq256:
15740 case X86::BI__builtin_ia32_prorq512:
15741 case X86::BI__builtin_ia32_prorvd128:
15742 case X86::BI__builtin_ia32_prorvd256:
15743 case X86::BI__builtin_ia32_prorvd512:
15744 case X86::BI__builtin_ia32_prorvq128:
15745 case X86::BI__builtin_ia32_prorvq256:
15746 case X86::BI__builtin_ia32_prorvq512:
15747 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
15748 case X86::BI__builtin_ia32_selectb_128:
15749 case X86::BI__builtin_ia32_selectb_256:
15750 case X86::BI__builtin_ia32_selectb_512:
15751 case X86::BI__builtin_ia32_selectw_128:
15752 case X86::BI__builtin_ia32_selectw_256:
15753 case X86::BI__builtin_ia32_selectw_512:
15754 case X86::BI__builtin_ia32_selectd_128:
15755 case X86::BI__builtin_ia32_selectd_256:
15756 case X86::BI__builtin_ia32_selectd_512:
15757 case X86::BI__builtin_ia32_selectq_128:
15758 case X86::BI__builtin_ia32_selectq_256:
15759 case X86::BI__builtin_ia32_selectq_512:
15760 case X86::BI__builtin_ia32_selectph_128:
15761 case X86::BI__builtin_ia32_selectph_256:
15762 case X86::BI__builtin_ia32_selectph_512:
15763 case X86::BI__builtin_ia32_selectpbf_128:
15764 case X86::BI__builtin_ia32_selectpbf_256:
15765 case X86::BI__builtin_ia32_selectpbf_512:
15766 case X86::BI__builtin_ia32_selectps_128:
15767 case X86::BI__builtin_ia32_selectps_256:
15768 case X86::BI__builtin_ia32_selectps_512:
15769 case X86::BI__builtin_ia32_selectpd_128:
15770 case X86::BI__builtin_ia32_selectpd_256:
15771 case X86::BI__builtin_ia32_selectpd_512:
15772 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
15773 case X86::BI__builtin_ia32_selectsh_128:
15774 case X86::BI__builtin_ia32_selectsbf_128:
15775 case X86::BI__builtin_ia32_selectss_128:
15776 case X86::BI__builtin_ia32_selectsd_128: {
15777 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15778 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15779 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
15780 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
15781 }
15782 case X86::BI__builtin_ia32_cmpb128_mask:
15783 case X86::BI__builtin_ia32_cmpb256_mask:
15784 case X86::BI__builtin_ia32_cmpb512_mask:
15785 case X86::BI__builtin_ia32_cmpw128_mask:
15786 case X86::BI__builtin_ia32_cmpw256_mask:
15787 case X86::BI__builtin_ia32_cmpw512_mask:
15788 case X86::BI__builtin_ia32_cmpd128_mask:
15789 case X86::BI__builtin_ia32_cmpd256_mask:
15790 case X86::BI__builtin_ia32_cmpd512_mask:
15791 case X86::BI__builtin_ia32_cmpq128_mask:
15792 case X86::BI__builtin_ia32_cmpq256_mask:
15793 case X86::BI__builtin_ia32_cmpq512_mask: {
15794 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15795 return EmitX86MaskedCompare(*this, CC, true, Ops);
15796 }
15797 case X86::BI__builtin_ia32_ucmpb128_mask:
15798 case X86::BI__builtin_ia32_ucmpb256_mask:
15799 case X86::BI__builtin_ia32_ucmpb512_mask:
15800 case X86::BI__builtin_ia32_ucmpw128_mask:
15801 case X86::BI__builtin_ia32_ucmpw256_mask:
15802 case X86::BI__builtin_ia32_ucmpw512_mask:
15803 case X86::BI__builtin_ia32_ucmpd128_mask:
15804 case X86::BI__builtin_ia32_ucmpd256_mask:
15805 case X86::BI__builtin_ia32_ucmpd512_mask:
15806 case X86::BI__builtin_ia32_ucmpq128_mask:
15807 case X86::BI__builtin_ia32_ucmpq256_mask:
15808 case X86::BI__builtin_ia32_ucmpq512_mask: {
15809 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15810 return EmitX86MaskedCompare(*this, CC, false, Ops);
15811 }
15812 case X86::BI__builtin_ia32_vpcomb:
15813 case X86::BI__builtin_ia32_vpcomw:
15814 case X86::BI__builtin_ia32_vpcomd:
15815 case X86::BI__builtin_ia32_vpcomq:
15816 return EmitX86vpcom(*this, Ops, true);
15817 case X86::BI__builtin_ia32_vpcomub:
15818 case X86::BI__builtin_ia32_vpcomuw:
15819 case X86::BI__builtin_ia32_vpcomud:
15820 case X86::BI__builtin_ia32_vpcomuq:
15821 return EmitX86vpcom(*this, Ops, false);
15822
15823 case X86::BI__builtin_ia32_kortestcqi:
15824 case X86::BI__builtin_ia32_kortestchi:
15825 case X86::BI__builtin_ia32_kortestcsi:
15826 case X86::BI__builtin_ia32_kortestcdi: {
15827 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15828 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
15829 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15830 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15831 }
15832 case X86::BI__builtin_ia32_kortestzqi:
15833 case X86::BI__builtin_ia32_kortestzhi:
15834 case X86::BI__builtin_ia32_kortestzsi:
15835 case X86::BI__builtin_ia32_kortestzdi: {
15836 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15837 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
15838 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15839 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15840 }
15841
15842 case X86::BI__builtin_ia32_ktestcqi:
15843 case X86::BI__builtin_ia32_ktestzqi:
15844 case X86::BI__builtin_ia32_ktestchi:
15845 case X86::BI__builtin_ia32_ktestzhi:
15846 case X86::BI__builtin_ia32_ktestcsi:
15847 case X86::BI__builtin_ia32_ktestzsi:
15848 case X86::BI__builtin_ia32_ktestcdi:
15849 case X86::BI__builtin_ia32_ktestzdi: {
15850 Intrinsic::ID IID;
15851 switch (BuiltinID) {
15852 default: llvm_unreachable("Unsupported intrinsic!");
15853 case X86::BI__builtin_ia32_ktestcqi:
15854 IID = Intrinsic::x86_avx512_ktestc_b;
15855 break;
15856 case X86::BI__builtin_ia32_ktestzqi:
15857 IID = Intrinsic::x86_avx512_ktestz_b;
15858 break;
15859 case X86::BI__builtin_ia32_ktestchi:
15860 IID = Intrinsic::x86_avx512_ktestc_w;
15861 break;
15862 case X86::BI__builtin_ia32_ktestzhi:
15863 IID = Intrinsic::x86_avx512_ktestz_w;
15864 break;
15865 case X86::BI__builtin_ia32_ktestcsi:
15866 IID = Intrinsic::x86_avx512_ktestc_d;
15867 break;
15868 case X86::BI__builtin_ia32_ktestzsi:
15869 IID = Intrinsic::x86_avx512_ktestz_d;
15870 break;
15871 case X86::BI__builtin_ia32_ktestcdi:
15872 IID = Intrinsic::x86_avx512_ktestc_q;
15873 break;
15874 case X86::BI__builtin_ia32_ktestzdi:
15875 IID = Intrinsic::x86_avx512_ktestz_q;
15876 break;
15877 }
15878
15879 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15880 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15881 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15882 Function *Intr = CGM.getIntrinsic(IID);
15883 return Builder.CreateCall(Intr, {LHS, RHS});
15884 }
15885
15886 case X86::BI__builtin_ia32_kaddqi:
15887 case X86::BI__builtin_ia32_kaddhi:
15888 case X86::BI__builtin_ia32_kaddsi:
15889 case X86::BI__builtin_ia32_kadddi: {
15890 Intrinsic::ID IID;
15891 switch (BuiltinID) {
15892 default: llvm_unreachable("Unsupported intrinsic!");
15893 case X86::BI__builtin_ia32_kaddqi:
15894 IID = Intrinsic::x86_avx512_kadd_b;
15895 break;
15896 case X86::BI__builtin_ia32_kaddhi:
15897 IID = Intrinsic::x86_avx512_kadd_w;
15898 break;
15899 case X86::BI__builtin_ia32_kaddsi:
15900 IID = Intrinsic::x86_avx512_kadd_d;
15901 break;
15902 case X86::BI__builtin_ia32_kadddi:
15903 IID = Intrinsic::x86_avx512_kadd_q;
15904 break;
15905 }
15906
15907 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15908 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15909 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15910 Function *Intr = CGM.getIntrinsic(IID);
15911 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
15912 return Builder.CreateBitCast(Res, Ops[0]->getType());
15913 }
15914 case X86::BI__builtin_ia32_kandqi:
15915 case X86::BI__builtin_ia32_kandhi:
15916 case X86::BI__builtin_ia32_kandsi:
15917 case X86::BI__builtin_ia32_kanddi:
15918 return EmitX86MaskLogic(*this, Instruction::And, Ops);
15919 case X86::BI__builtin_ia32_kandnqi:
15920 case X86::BI__builtin_ia32_kandnhi:
15921 case X86::BI__builtin_ia32_kandnsi:
15922 case X86::BI__builtin_ia32_kandndi:
15923 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
15924 case X86::BI__builtin_ia32_korqi:
15925 case X86::BI__builtin_ia32_korhi:
15926 case X86::BI__builtin_ia32_korsi:
15927 case X86::BI__builtin_ia32_kordi:
15928 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
15929 case X86::BI__builtin_ia32_kxnorqi:
15930 case X86::BI__builtin_ia32_kxnorhi:
15931 case X86::BI__builtin_ia32_kxnorsi:
15932 case X86::BI__builtin_ia32_kxnordi:
15933 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
15934 case X86::BI__builtin_ia32_kxorqi:
15935 case X86::BI__builtin_ia32_kxorhi:
15936 case X86::BI__builtin_ia32_kxorsi:
15937 case X86::BI__builtin_ia32_kxordi:
15938 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
15939 case X86::BI__builtin_ia32_knotqi:
15940 case X86::BI__builtin_ia32_knothi:
15941 case X86::BI__builtin_ia32_knotsi:
15942 case X86::BI__builtin_ia32_knotdi: {
15943 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15944 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15945 return Builder.CreateBitCast(Builder.CreateNot(Res),
15946 Ops[0]->getType());
15947 }
15948 case X86::BI__builtin_ia32_kmovb:
15949 case X86::BI__builtin_ia32_kmovw:
15950 case X86::BI__builtin_ia32_kmovd:
15951 case X86::BI__builtin_ia32_kmovq: {
15952 // Bitcast to vXi1 type and then back to integer. This gets the mask
15953 // register type into the IR, but might be optimized out depending on
15954 // what's around it.
15955 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15956 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15957 return Builder.CreateBitCast(Res, Ops[0]->getType());
15958 }
15959
15960 case X86::BI__builtin_ia32_kunpckdi:
15961 case X86::BI__builtin_ia32_kunpcksi:
15962 case X86::BI__builtin_ia32_kunpckhi: {
15963 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15964 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15965 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15966 int Indices[64];
15967 for (unsigned i = 0; i != NumElts; ++i)
15968 Indices[i] = i;
15969
15970 // First extract half of each vector. This gives better codegen than
15971 // doing it in a single shuffle.
15972 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
15973 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
15974 // Concat the vectors.
15975 // NOTE: Operands are swapped to match the intrinsic definition.
15976 Value *Res =
15977 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
15978 return Builder.CreateBitCast(Res, Ops[0]->getType());
15979 }
15980
15981 case X86::BI__builtin_ia32_vplzcntd_128:
15982 case X86::BI__builtin_ia32_vplzcntd_256:
15983 case X86::BI__builtin_ia32_vplzcntd_512:
15984 case X86::BI__builtin_ia32_vplzcntq_128:
15985 case X86::BI__builtin_ia32_vplzcntq_256:
15986 case X86::BI__builtin_ia32_vplzcntq_512: {
15987 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15988 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
15989 }
15990 case X86::BI__builtin_ia32_sqrtss:
15991 case X86::BI__builtin_ia32_sqrtsd: {
15992 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
15993 Function *F;
15994 if (Builder.getIsFPConstrained()) {
15995 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15996 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15997 A->getType());
15998 A = Builder.CreateConstrainedFPCall(F, {A});
15999 } else {
16000 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16001 A = Builder.CreateCall(F, {A});
16002 }
16003 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16004 }
16005 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16006 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16007 case X86::BI__builtin_ia32_sqrtss_round_mask: {
16008 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
16009 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16010 // otherwise keep the intrinsic.
16011 if (CC != 4) {
16012 Intrinsic::ID IID;
16013
16014 switch (BuiltinID) {
16015 default:
16016 llvm_unreachable("Unsupported intrinsic!");
16017 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16018 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
16019 break;
16020 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16021 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
16022 break;
16023 case X86::BI__builtin_ia32_sqrtss_round_mask:
16024 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
16025 break;
16026 }
16027 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16028 }
16029 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16030 Function *F;
16031 if (Builder.getIsFPConstrained()) {
16032 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16033 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16034 A->getType());
16035 A = Builder.CreateConstrainedFPCall(F, A);
16036 } else {
16037 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16038 A = Builder.CreateCall(F, A);
16039 }
16040 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16041 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
16042 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16043 }
16044 case X86::BI__builtin_ia32_sqrtpd256:
16045 case X86::BI__builtin_ia32_sqrtpd:
16046 case X86::BI__builtin_ia32_sqrtps256:
16047 case X86::BI__builtin_ia32_sqrtps:
16048 case X86::BI__builtin_ia32_sqrtph256:
16049 case X86::BI__builtin_ia32_sqrtph:
16050 case X86::BI__builtin_ia32_sqrtph512:
16051 case X86::BI__builtin_ia32_sqrtps512:
16052 case X86::BI__builtin_ia32_sqrtpd512: {
16053 if (Ops.size() == 2) {
16054 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16055 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16056 // otherwise keep the intrinsic.
16057 if (CC != 4) {
16058 Intrinsic::ID IID;
16059
16060 switch (BuiltinID) {
16061 default:
16062 llvm_unreachable("Unsupported intrinsic!");
16063 case X86::BI__builtin_ia32_sqrtph512:
16064 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
16065 break;
16066 case X86::BI__builtin_ia32_sqrtps512:
16067 IID = Intrinsic::x86_avx512_sqrt_ps_512;
16068 break;
16069 case X86::BI__builtin_ia32_sqrtpd512:
16070 IID = Intrinsic::x86_avx512_sqrt_pd_512;
16071 break;
16072 }
16073 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16074 }
16075 }
16076 if (Builder.getIsFPConstrained()) {
16077 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16078 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16079 Ops[0]->getType());
16080 return Builder.CreateConstrainedFPCall(F, Ops[0]);
16081 } else {
16082 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
16083 return Builder.CreateCall(F, Ops[0]);
16084 }
16085 }
16086
16087 case X86::BI__builtin_ia32_pmuludq128:
16088 case X86::BI__builtin_ia32_pmuludq256:
16089 case X86::BI__builtin_ia32_pmuludq512:
16090 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
16091
16092 case X86::BI__builtin_ia32_pmuldq128:
16093 case X86::BI__builtin_ia32_pmuldq256:
16094 case X86::BI__builtin_ia32_pmuldq512:
16095 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
16096
16097 case X86::BI__builtin_ia32_pternlogd512_mask:
16098 case X86::BI__builtin_ia32_pternlogq512_mask:
16099 case X86::BI__builtin_ia32_pternlogd128_mask:
16100 case X86::BI__builtin_ia32_pternlogd256_mask:
16101 case X86::BI__builtin_ia32_pternlogq128_mask:
16102 case X86::BI__builtin_ia32_pternlogq256_mask:
16103 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
16104
16105 case X86::BI__builtin_ia32_pternlogd512_maskz:
16106 case X86::BI__builtin_ia32_pternlogq512_maskz:
16107 case X86::BI__builtin_ia32_pternlogd128_maskz:
16108 case X86::BI__builtin_ia32_pternlogd256_maskz:
16109 case X86::BI__builtin_ia32_pternlogq128_maskz:
16110 case X86::BI__builtin_ia32_pternlogq256_maskz:
16111 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
16112
16113 case X86::BI__builtin_ia32_vpshldd128:
16114 case X86::BI__builtin_ia32_vpshldd256:
16115 case X86::BI__builtin_ia32_vpshldd512:
16116 case X86::BI__builtin_ia32_vpshldq128:
16117 case X86::BI__builtin_ia32_vpshldq256:
16118 case X86::BI__builtin_ia32_vpshldq512:
16119 case X86::BI__builtin_ia32_vpshldw128:
16120 case X86::BI__builtin_ia32_vpshldw256:
16121 case X86::BI__builtin_ia32_vpshldw512:
16122 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16123
16124 case X86::BI__builtin_ia32_vpshrdd128:
16125 case X86::BI__builtin_ia32_vpshrdd256:
16126 case X86::BI__builtin_ia32_vpshrdd512:
16127 case X86::BI__builtin_ia32_vpshrdq128:
16128 case X86::BI__builtin_ia32_vpshrdq256:
16129 case X86::BI__builtin_ia32_vpshrdq512:
16130 case X86::BI__builtin_ia32_vpshrdw128:
16131 case X86::BI__builtin_ia32_vpshrdw256:
16132 case X86::BI__builtin_ia32_vpshrdw512:
16133 // Ops 0 and 1 are swapped.
16134 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16135
16136 case X86::BI__builtin_ia32_vpshldvd128:
16137 case X86::BI__builtin_ia32_vpshldvd256:
16138 case X86::BI__builtin_ia32_vpshldvd512:
16139 case X86::BI__builtin_ia32_vpshldvq128:
16140 case X86::BI__builtin_ia32_vpshldvq256:
16141 case X86::BI__builtin_ia32_vpshldvq512:
16142 case X86::BI__builtin_ia32_vpshldvw128:
16143 case X86::BI__builtin_ia32_vpshldvw256:
16144 case X86::BI__builtin_ia32_vpshldvw512:
16145 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16146
16147 case X86::BI__builtin_ia32_vpshrdvd128:
16148 case X86::BI__builtin_ia32_vpshrdvd256:
16149 case X86::BI__builtin_ia32_vpshrdvd512:
16150 case X86::BI__builtin_ia32_vpshrdvq128:
16151 case X86::BI__builtin_ia32_vpshrdvq256:
16152 case X86::BI__builtin_ia32_vpshrdvq512:
16153 case X86::BI__builtin_ia32_vpshrdvw128:
16154 case X86::BI__builtin_ia32_vpshrdvw256:
16155 case X86::BI__builtin_ia32_vpshrdvw512:
16156 // Ops 0 and 1 are swapped.
16157 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16158
16159 // Reductions
16160 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16161 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16162 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16163 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16164 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16165 Function *F =
16166 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16167 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16168 Builder.getFastMathFlags().setAllowReassoc();
16169 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16170 }
16171 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16172 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16173 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16174 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16175 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16176 Function *F =
16177 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16178 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16179 Builder.getFastMathFlags().setAllowReassoc();
16180 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16181 }
16182 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16183 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16184 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16185 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16186 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16187 Function *F =
16188 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16189 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16190 Builder.getFastMathFlags().setNoNaNs();
16191 return Builder.CreateCall(F, {Ops[0]});
16192 }
16193 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16194 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16195 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16196 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16197 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16198 Function *F =
16199 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16200 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16201 Builder.getFastMathFlags().setNoNaNs();
16202 return Builder.CreateCall(F, {Ops[0]});
16203 }
16204
16205 case X86::BI__builtin_ia32_rdrand16_step:
16206 case X86::BI__builtin_ia32_rdrand32_step:
16207 case X86::BI__builtin_ia32_rdrand64_step:
16208 case X86::BI__builtin_ia32_rdseed16_step:
16209 case X86::BI__builtin_ia32_rdseed32_step:
16210 case X86::BI__builtin_ia32_rdseed64_step: {
16211 Intrinsic::ID ID;
16212 switch (BuiltinID) {
16213 default: llvm_unreachable("Unsupported intrinsic!");
16214 case X86::BI__builtin_ia32_rdrand16_step:
16215 ID = Intrinsic::x86_rdrand_16;
16216 break;
16217 case X86::BI__builtin_ia32_rdrand32_step:
16218 ID = Intrinsic::x86_rdrand_32;
16219 break;
16220 case X86::BI__builtin_ia32_rdrand64_step:
16221 ID = Intrinsic::x86_rdrand_64;
16222 break;
16223 case X86::BI__builtin_ia32_rdseed16_step:
16224 ID = Intrinsic::x86_rdseed_16;
16225 break;
16226 case X86::BI__builtin_ia32_rdseed32_step:
16227 ID = Intrinsic::x86_rdseed_32;
16228 break;
16229 case X86::BI__builtin_ia32_rdseed64_step:
16230 ID = Intrinsic::x86_rdseed_64;
16231 break;
16232 }
16233
16234 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16235 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16236 Ops[0]);
16237 return Builder.CreateExtractValue(Call, 1);
16238 }
16239 case X86::BI__builtin_ia32_addcarryx_u32:
16240 case X86::BI__builtin_ia32_addcarryx_u64:
16241 case X86::BI__builtin_ia32_subborrow_u32:
16242 case X86::BI__builtin_ia32_subborrow_u64: {
16243 Intrinsic::ID IID;
16244 switch (BuiltinID) {
16245 default: llvm_unreachable("Unsupported intrinsic!");
16246 case X86::BI__builtin_ia32_addcarryx_u32:
16247 IID = Intrinsic::x86_addcarry_32;
16248 break;
16249 case X86::BI__builtin_ia32_addcarryx_u64:
16250 IID = Intrinsic::x86_addcarry_64;
16251 break;
16252 case X86::BI__builtin_ia32_subborrow_u32:
16253 IID = Intrinsic::x86_subborrow_32;
16254 break;
16255 case X86::BI__builtin_ia32_subborrow_u64:
16256 IID = Intrinsic::x86_subborrow_64;
16257 break;
16258 }
16259
16260 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16261 { Ops[0], Ops[1], Ops[2] });
16262 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16263 Ops[3]);
16264 return Builder.CreateExtractValue(Call, 0);
16265 }
16266
16267 case X86::BI__builtin_ia32_fpclassps128_mask:
16268 case X86::BI__builtin_ia32_fpclassps256_mask:
16269 case X86::BI__builtin_ia32_fpclassps512_mask:
16270 case X86::BI__builtin_ia32_fpclassph128_mask:
16271 case X86::BI__builtin_ia32_fpclassph256_mask:
16272 case X86::BI__builtin_ia32_fpclassph512_mask:
16273 case X86::BI__builtin_ia32_fpclasspd128_mask:
16274 case X86::BI__builtin_ia32_fpclasspd256_mask:
16275 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16276 unsigned NumElts =
16277 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16278 Value *MaskIn = Ops[2];
16279 Ops.erase(&Ops[2]);
16280
16281 Intrinsic::ID ID;
16282 switch (BuiltinID) {
16283 default: llvm_unreachable("Unsupported intrinsic!");
16284 case X86::BI__builtin_ia32_fpclassph128_mask:
16285 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16286 break;
16287 case X86::BI__builtin_ia32_fpclassph256_mask:
16288 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16289 break;
16290 case X86::BI__builtin_ia32_fpclassph512_mask:
16291 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16292 break;
16293 case X86::BI__builtin_ia32_fpclassps128_mask:
16294 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16295 break;
16296 case X86::BI__builtin_ia32_fpclassps256_mask:
16297 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16298 break;
16299 case X86::BI__builtin_ia32_fpclassps512_mask:
16300 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16301 break;
16302 case X86::BI__builtin_ia32_fpclasspd128_mask:
16303 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16304 break;
16305 case X86::BI__builtin_ia32_fpclasspd256_mask:
16306 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16307 break;
16308 case X86::BI__builtin_ia32_fpclasspd512_mask:
16309 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16310 break;
16311 }
16312
16313 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16314 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16315 }
16316
16317 case X86::BI__builtin_ia32_vp2intersect_q_512:
16318 case X86::BI__builtin_ia32_vp2intersect_q_256:
16319 case X86::BI__builtin_ia32_vp2intersect_q_128:
16320 case X86::BI__builtin_ia32_vp2intersect_d_512:
16321 case X86::BI__builtin_ia32_vp2intersect_d_256:
16322 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16323 unsigned NumElts =
16324 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16325 Intrinsic::ID ID;
16326
16327 switch (BuiltinID) {
16328 default: llvm_unreachable("Unsupported intrinsic!");
16329 case X86::BI__builtin_ia32_vp2intersect_q_512:
16330 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16331 break;
16332 case X86::BI__builtin_ia32_vp2intersect_q_256:
16333 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16334 break;
16335 case X86::BI__builtin_ia32_vp2intersect_q_128:
16336 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16337 break;
16338 case X86::BI__builtin_ia32_vp2intersect_d_512:
16339 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16340 break;
16341 case X86::BI__builtin_ia32_vp2intersect_d_256:
16342 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16343 break;
16344 case X86::BI__builtin_ia32_vp2intersect_d_128:
16345 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16346 break;
16347 }
16348
16349 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16350 Value *Result = Builder.CreateExtractValue(Call, 0);
16351 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16353
16354 Result = Builder.CreateExtractValue(Call, 1);
16355 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16357 }
16358
16359 case X86::BI__builtin_ia32_vpmultishiftqb128:
16360 case X86::BI__builtin_ia32_vpmultishiftqb256:
16361 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16362 Intrinsic::ID ID;
16363 switch (BuiltinID) {
16364 default: llvm_unreachable("Unsupported intrinsic!");
16365 case X86::BI__builtin_ia32_vpmultishiftqb128:
16366 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16367 break;
16368 case X86::BI__builtin_ia32_vpmultishiftqb256:
16369 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16370 break;
16371 case X86::BI__builtin_ia32_vpmultishiftqb512:
16372 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16373 break;
16374 }
16375
16376 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16377 }
16378
16379 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16380 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16381 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16382 unsigned NumElts =
16383 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16384 Value *MaskIn = Ops[2];
16385 Ops.erase(&Ops[2]);
16386
16387 Intrinsic::ID ID;
16388 switch (BuiltinID) {
16389 default: llvm_unreachable("Unsupported intrinsic!");
16390 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16391 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16392 break;
16393 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16394 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16395 break;
16396 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16397 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16398 break;
16399 }
16400
16401 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16402 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16403 }
16404
16405 // packed comparison intrinsics
16406 case X86::BI__builtin_ia32_cmpeqps:
16407 case X86::BI__builtin_ia32_cmpeqpd:
16408 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16409 case X86::BI__builtin_ia32_cmpltps:
16410 case X86::BI__builtin_ia32_cmpltpd:
16411 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16412 case X86::BI__builtin_ia32_cmpleps:
16413 case X86::BI__builtin_ia32_cmplepd:
16414 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16415 case X86::BI__builtin_ia32_cmpunordps:
16416 case X86::BI__builtin_ia32_cmpunordpd:
16417 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16418 case X86::BI__builtin_ia32_cmpneqps:
16419 case X86::BI__builtin_ia32_cmpneqpd:
16420 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16421 case X86::BI__builtin_ia32_cmpnltps:
16422 case X86::BI__builtin_ia32_cmpnltpd:
16423 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
16424 case X86::BI__builtin_ia32_cmpnleps:
16425 case X86::BI__builtin_ia32_cmpnlepd:
16426 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
16427 case X86::BI__builtin_ia32_cmpordps:
16428 case X86::BI__builtin_ia32_cmpordpd:
16429 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
16430 case X86::BI__builtin_ia32_cmpph128_mask:
16431 case X86::BI__builtin_ia32_cmpph256_mask:
16432 case X86::BI__builtin_ia32_cmpph512_mask:
16433 case X86::BI__builtin_ia32_cmpps128_mask:
16434 case X86::BI__builtin_ia32_cmpps256_mask:
16435 case X86::BI__builtin_ia32_cmpps512_mask:
16436 case X86::BI__builtin_ia32_cmppd128_mask:
16437 case X86::BI__builtin_ia32_cmppd256_mask:
16438 case X86::BI__builtin_ia32_cmppd512_mask:
16439 case X86::BI__builtin_ia32_vcmppd256_round_mask:
16440 case X86::BI__builtin_ia32_vcmpps256_round_mask:
16441 case X86::BI__builtin_ia32_vcmpph256_round_mask:
16442 IsMaskFCmp = true;
16443 [[fallthrough]];
16444 case X86::BI__builtin_ia32_cmpps:
16445 case X86::BI__builtin_ia32_cmpps256:
16446 case X86::BI__builtin_ia32_cmppd:
16447 case X86::BI__builtin_ia32_cmppd256: {
16448 // Lowering vector comparisons to fcmp instructions, while
16449 // ignoring signalling behaviour requested
16450 // ignoring rounding mode requested
16451 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16452
16453 // The third argument is the comparison condition, and integer in the
16454 // range [0, 31]
16455 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16456
16457 // Lowering to IR fcmp instruction.
16458 // Ignoring requested signaling behaviour,
16459 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16460 FCmpInst::Predicate Pred;
16461 bool IsSignaling;
16462 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16463 // behavior is inverted. We'll handle that after the switch.
16464 switch (CC & 0xf) {
16465 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
16466 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
16467 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
16468 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
16469 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
16470 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
16471 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
16472 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
16473 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
16474 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
16475 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
16476 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
16477 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
16478 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
16479 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
16480 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
16481 default: llvm_unreachable("Unhandled CC");
16482 }
16483
16484 // Invert the signalling behavior for 16-31.
16485 if (CC & 0x10)
16486 IsSignaling = !IsSignaling;
16487
16488 // If the predicate is true or false and we're using constrained intrinsics,
16489 // we don't have a compare intrinsic we can use. Just use the legacy X86
16490 // specific intrinsic.
16491 // If the intrinsic is mask enabled and we're using constrained intrinsics,
16492 // use the legacy X86 specific intrinsic.
16493 if (Builder.getIsFPConstrained() &&
16494 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
16495 IsMaskFCmp)) {
16496
16497 Intrinsic::ID IID;
16498 switch (BuiltinID) {
16499 default: llvm_unreachable("Unexpected builtin");
16500 case X86::BI__builtin_ia32_cmpps:
16501 IID = Intrinsic::x86_sse_cmp_ps;
16502 break;
16503 case X86::BI__builtin_ia32_cmpps256:
16504 IID = Intrinsic::x86_avx_cmp_ps_256;
16505 break;
16506 case X86::BI__builtin_ia32_cmppd:
16507 IID = Intrinsic::x86_sse2_cmp_pd;
16508 break;
16509 case X86::BI__builtin_ia32_cmppd256:
16510 IID = Intrinsic::x86_avx_cmp_pd_256;
16511 break;
16512 case X86::BI__builtin_ia32_cmpph128_mask:
16513 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
16514 break;
16515 case X86::BI__builtin_ia32_cmpph256_mask:
16516 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
16517 break;
16518 case X86::BI__builtin_ia32_cmpph512_mask:
16519 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
16520 break;
16521 case X86::BI__builtin_ia32_cmpps512_mask:
16522 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
16523 break;
16524 case X86::BI__builtin_ia32_cmppd512_mask:
16525 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
16526 break;
16527 case X86::BI__builtin_ia32_cmpps128_mask:
16528 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
16529 break;
16530 case X86::BI__builtin_ia32_cmpps256_mask:
16531 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
16532 break;
16533 case X86::BI__builtin_ia32_cmppd128_mask:
16534 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
16535 break;
16536 case X86::BI__builtin_ia32_cmppd256_mask:
16537 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
16538 break;
16539 }
16540
16541 Function *Intr = CGM.getIntrinsic(IID);
16542 if (IsMaskFCmp) {
16543 unsigned NumElts =
16544 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16545 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
16546 Value *Cmp = Builder.CreateCall(Intr, Ops);
16547 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
16548 }
16549
16550 return Builder.CreateCall(Intr, Ops);
16551 }
16552
16553 // Builtins without the _mask suffix return a vector of integers
16554 // of the same width as the input vectors
16555 if (IsMaskFCmp) {
16556 // We ignore SAE if strict FP is disabled. We only keep precise
16557 // exception behavior under strict FP.
16558 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
16559 // object will be required.
16560 unsigned NumElts =
16561 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16562 Value *Cmp;
16563 if (IsSignaling)
16564 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
16565 else
16566 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
16567 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
16568 }
16569
16570 return getVectorFCmpIR(Pred, IsSignaling);
16571 }
16572
16573 // SSE scalar comparison intrinsics
16574 case X86::BI__builtin_ia32_cmpeqss:
16575 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
16576 case X86::BI__builtin_ia32_cmpltss:
16577 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
16578 case X86::BI__builtin_ia32_cmpless:
16579 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
16580 case X86::BI__builtin_ia32_cmpunordss:
16581 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
16582 case X86::BI__builtin_ia32_cmpneqss:
16583 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
16584 case X86::BI__builtin_ia32_cmpnltss:
16585 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
16586 case X86::BI__builtin_ia32_cmpnless:
16587 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
16588 case X86::BI__builtin_ia32_cmpordss:
16589 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
16590 case X86::BI__builtin_ia32_cmpeqsd:
16591 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
16592 case X86::BI__builtin_ia32_cmpltsd:
16593 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
16594 case X86::BI__builtin_ia32_cmplesd:
16595 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
16596 case X86::BI__builtin_ia32_cmpunordsd:
16597 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
16598 case X86::BI__builtin_ia32_cmpneqsd:
16599 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
16600 case X86::BI__builtin_ia32_cmpnltsd:
16601 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
16602 case X86::BI__builtin_ia32_cmpnlesd:
16603 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
16604 case X86::BI__builtin_ia32_cmpordsd:
16605 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
16606
16607 // f16c half2float intrinsics
16608 case X86::BI__builtin_ia32_vcvtph2ps:
16609 case X86::BI__builtin_ia32_vcvtph2ps256:
16610 case X86::BI__builtin_ia32_vcvtph2ps_mask:
16611 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
16612 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
16613 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16614 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
16615 }
16616
16617 // AVX512 bf16 intrinsics
16618 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
16619 Ops[2] = getMaskVecValue(
16620 *this, Ops[2],
16621 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
16622 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
16623 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16624 }
16625 case X86::BI__builtin_ia32_cvtsbf162ss_32:
16626 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
16627
16628 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16629 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
16630 Intrinsic::ID IID;
16631 switch (BuiltinID) {
16632 default: llvm_unreachable("Unsupported intrinsic!");
16633 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16634 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
16635 break;
16636 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
16637 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
16638 break;
16639 }
16640 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
16641 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
16642 }
16643
16644 case X86::BI__cpuid:
16645 case X86::BI__cpuidex: {
16646 Value *FuncId = EmitScalarExpr(E->getArg(1));
16647 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
16648 ? EmitScalarExpr(E->getArg(2))
16649 : llvm::ConstantInt::get(Int32Ty, 0);
16650
16651 llvm::StructType *CpuidRetTy =
16652 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
16653 llvm::FunctionType *FTy =
16654 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
16655
16656 StringRef Asm, Constraints;
16657 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
16658 Asm = "cpuid";
16659 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
16660 } else {
16661 // x86-64 uses %rbx as the base register, so preserve it.
16662 Asm = "xchgq %rbx, ${1:q}\n"
16663 "cpuid\n"
16664 "xchgq %rbx, ${1:q}";
16665 Constraints = "={ax},=r,={cx},={dx},0,2";
16666 }
16667
16668 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
16669 /*hasSideEffects=*/false);
16670 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
16671 Value *BasePtr = EmitScalarExpr(E->getArg(0));
16672 Value *Store = nullptr;
16673 for (unsigned i = 0; i < 4; i++) {
16674 Value *Extracted = Builder.CreateExtractValue(IACall, i);
16675 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
16676 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
16677 }
16678
16679 // Return the last store instruction to signal that we have emitted the
16680 // the intrinsic.
16681 return Store;
16682 }
16683
16684 case X86::BI__emul:
16685 case X86::BI__emulu: {
16686 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
16687 bool isSigned = (BuiltinID == X86::BI__emul);
16688 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
16689 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
16690 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
16691 }
16692 case X86::BI__mulh:
16693 case X86::BI__umulh:
16694 case X86::BI_mul128:
16695 case X86::BI_umul128: {
16696 llvm::Type *ResType = ConvertType(E->getType());
16697 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16698
16699 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
16700 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
16701 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
16702
16703 Value *MulResult, *HigherBits;
16704 if (IsSigned) {
16705 MulResult = Builder.CreateNSWMul(LHS, RHS);
16706 HigherBits = Builder.CreateAShr(MulResult, 64);
16707 } else {
16708 MulResult = Builder.CreateNUWMul(LHS, RHS);
16709 HigherBits = Builder.CreateLShr(MulResult, 64);
16710 }
16711 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
16712
16713 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
16714 return HigherBits;
16715
16716 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
16717 Builder.CreateStore(HigherBits, HighBitsAddress);
16718 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
16719 }
16720
16721 case X86::BI__faststorefence: {
16722 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16723 llvm::SyncScope::System);
16724 }
16725 case X86::BI__shiftleft128:
16726 case X86::BI__shiftright128: {
16727 llvm::Function *F = CGM.getIntrinsic(
16728 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
16729 Int64Ty);
16730 // Flip low/high ops and zero-extend amount to matching type.
16731 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
16732 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
16733 std::swap(Ops[0], Ops[1]);
16734 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
16735 return Builder.CreateCall(F, Ops);
16736 }
16737 case X86::BI_ReadWriteBarrier:
16738 case X86::BI_ReadBarrier:
16739 case X86::BI_WriteBarrier: {
16740 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16741 llvm::SyncScope::SingleThread);
16742 }
16743
16744 case X86::BI_AddressOfReturnAddress: {
16745 Function *F =
16746 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
16747 return Builder.CreateCall(F);
16748 }
16749 case X86::BI__stosb: {
16750 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
16751 // instruction, but it will create a memset that won't be optimized away.
16752 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
16753 }
16754 case X86::BI__ud2:
16755 // llvm.trap makes a ud2a instruction on x86.
16756 return EmitTrapCall(Intrinsic::trap);
16757 case X86::BI__int2c: {
16758 // This syscall signals a driver assertion failure in x86 NT kernels.
16759 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
16760 llvm::InlineAsm *IA =
16761 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
16762 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
16763 getLLVMContext(), llvm::AttributeList::FunctionIndex,
16764 llvm::Attribute::NoReturn);
16765 llvm::CallInst *CI = Builder.CreateCall(IA);
16766 CI->setAttributes(NoReturnAttr);
16767 return CI;
16768 }
16769 case X86::BI__readfsbyte:
16770 case X86::BI__readfsword:
16771 case X86::BI__readfsdword:
16772 case X86::BI__readfsqword: {
16773 llvm::Type *IntTy = ConvertType(E->getType());
16774 Value *Ptr = Builder.CreateIntToPtr(
16775 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
16776 LoadInst *Load = Builder.CreateAlignedLoad(
16777 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16778 Load->setVolatile(true);
16779 return Load;
16780 }
16781 case X86::BI__readgsbyte:
16782 case X86::BI__readgsword:
16783 case X86::BI__readgsdword:
16784 case X86::BI__readgsqword: {
16785 llvm::Type *IntTy = ConvertType(E->getType());
16786 Value *Ptr = Builder.CreateIntToPtr(
16787 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
16788 LoadInst *Load = Builder.CreateAlignedLoad(
16789 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16790 Load->setVolatile(true);
16791 return Load;
16792 }
16793 case X86::BI__builtin_ia32_encodekey128_u32: {
16794 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
16795
16796 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
16797
16798 for (int i = 0; i < 3; ++i) {
16799 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16800 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
16801 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16802 }
16803
16804 return Builder.CreateExtractValue(Call, 0);
16805 }
16806 case X86::BI__builtin_ia32_encodekey256_u32: {
16807 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
16808
16809 Value *Call =
16810 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
16811
16812 for (int i = 0; i < 4; ++i) {
16813 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16814 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
16815 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16816 }
16817
16818 return Builder.CreateExtractValue(Call, 0);
16819 }
16820 case X86::BI__builtin_ia32_aesenc128kl_u8:
16821 case X86::BI__builtin_ia32_aesdec128kl_u8:
16822 case X86::BI__builtin_ia32_aesenc256kl_u8:
16823 case X86::BI__builtin_ia32_aesdec256kl_u8: {
16824 Intrinsic::ID IID;
16825 StringRef BlockName;
16826 switch (BuiltinID) {
16827 default:
16828 llvm_unreachable("Unexpected builtin");
16829 case X86::BI__builtin_ia32_aesenc128kl_u8:
16830 IID = Intrinsic::x86_aesenc128kl;
16831 BlockName = "aesenc128kl";
16832 break;
16833 case X86::BI__builtin_ia32_aesdec128kl_u8:
16834 IID = Intrinsic::x86_aesdec128kl;
16835 BlockName = "aesdec128kl";
16836 break;
16837 case X86::BI__builtin_ia32_aesenc256kl_u8:
16838 IID = Intrinsic::x86_aesenc256kl;
16839 BlockName = "aesenc256kl";
16840 break;
16841 case X86::BI__builtin_ia32_aesdec256kl_u8:
16842 IID = Intrinsic::x86_aesdec256kl;
16843 BlockName = "aesdec256kl";
16844 break;
16845 }
16846
16847 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
16848
16849 BasicBlock *NoError =
16850 createBasicBlock(BlockName + "_no_error", this->CurFn);
16851 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16852 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16853
16854 Value *Ret = Builder.CreateExtractValue(Call, 0);
16855 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16856 Value *Out = Builder.CreateExtractValue(Call, 1);
16857 Builder.CreateCondBr(Succ, NoError, Error);
16858
16859 Builder.SetInsertPoint(NoError);
16861 Builder.CreateBr(End);
16862
16863 Builder.SetInsertPoint(Error);
16864 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16865 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
16866 Builder.CreateBr(End);
16867
16868 Builder.SetInsertPoint(End);
16869 return Builder.CreateExtractValue(Call, 0);
16870 }
16871 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16872 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16873 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16874 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
16875 Intrinsic::ID IID;
16876 StringRef BlockName;
16877 switch (BuiltinID) {
16878 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16879 IID = Intrinsic::x86_aesencwide128kl;
16880 BlockName = "aesencwide128kl";
16881 break;
16882 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16883 IID = Intrinsic::x86_aesdecwide128kl;
16884 BlockName = "aesdecwide128kl";
16885 break;
16886 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16887 IID = Intrinsic::x86_aesencwide256kl;
16888 BlockName = "aesencwide256kl";
16889 break;
16890 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
16891 IID = Intrinsic::x86_aesdecwide256kl;
16892 BlockName = "aesdecwide256kl";
16893 break;
16894 }
16895
16896 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
16897 Value *InOps[9];
16898 InOps[0] = Ops[2];
16899 for (int i = 0; i != 8; ++i) {
16900 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
16901 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
16902 }
16903
16904 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
16905
16906 BasicBlock *NoError =
16907 createBasicBlock(BlockName + "_no_error", this->CurFn);
16908 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16909 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16910
16911 Value *Ret = Builder.CreateExtractValue(Call, 0);
16912 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16913 Builder.CreateCondBr(Succ, NoError, Error);
16914
16915 Builder.SetInsertPoint(NoError);
16916 for (int i = 0; i != 8; ++i) {
16917 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16918 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
16919 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
16920 }
16921 Builder.CreateBr(End);
16922
16923 Builder.SetInsertPoint(Error);
16924 for (int i = 0; i != 8; ++i) {
16925 Value *Out = Builder.CreateExtractValue(Call, i + 1);
16926 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16927 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
16928 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
16929 }
16930 Builder.CreateBr(End);
16931
16932 Builder.SetInsertPoint(End);
16933 return Builder.CreateExtractValue(Call, 0);
16934 }
16935 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
16936 IsConjFMA = true;
16937 [[fallthrough]];
16938 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
16939 Intrinsic::ID IID = IsConjFMA
16940 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
16941 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
16942 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16943 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16944 }
16945 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
16946 IsConjFMA = true;
16947 LLVM_FALLTHROUGH;
16948 case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
16949 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
16950 : Intrinsic::x86_avx10_mask_vfmaddcph256;
16951 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16952 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16953 }
16954 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
16955 IsConjFMA = true;
16956 [[fallthrough]];
16957 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
16958 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16959 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16960 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16961 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
16962 return EmitX86Select(*this, And, Call, Ops[0]);
16963 }
16964 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
16965 IsConjFMA = true;
16966 [[fallthrough]];
16967 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
16968 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16969 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16970 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16971 static constexpr int Mask[] = {0, 5, 6, 7};
16972 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
16973 }
16974 case X86::BI__builtin_ia32_prefetchi:
16975 return Builder.CreateCall(
16976 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
16977 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
16978 llvm::ConstantInt::get(Int32Ty, 0)});
16979 }
16980}
16981
16982Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
16983 const CallExpr *E) {
16984 // Do not emit the builtin arguments in the arguments of a function call,
16985 // because the evaluation order of function arguments is not specified in C++.
16986 // This is important when testing to ensure the arguments are emitted in the
16987 // same order every time. Eg:
16988 // Instead of:
16989 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
16990 // EmitScalarExpr(E->getArg(1)), "swdiv");
16991 // Use:
16992 // Value *Op0 = EmitScalarExpr(E->getArg(0));
16993 // Value *Op1 = EmitScalarExpr(E->getArg(1));
16994 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
16995
16996 Intrinsic::ID ID = Intrinsic::not_intrinsic;
16997
16998#include "llvm/TargetParser/PPCTargetParser.def"
16999 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
17000 unsigned Mask, CmpInst::Predicate CompOp,
17001 unsigned OpValue) -> Value * {
17002 if (SupportMethod == BUILTIN_PPC_FALSE)
17003 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17004
17005 if (SupportMethod == BUILTIN_PPC_TRUE)
17006 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
17007
17008 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
17009
17010 llvm::Value *FieldValue = nullptr;
17011 if (SupportMethod == USE_SYS_CONF) {
17012 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
17013 llvm::Constant *SysConf =
17014 CGM.CreateRuntimeVariable(STy, "_system_configuration");
17015
17016 // Grab the appropriate field from _system_configuration.
17017 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
17018 ConstantInt::get(Int32Ty, FieldIdx)};
17019
17020 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
17021 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
17023 } else if (SupportMethod == SYS_CALL) {
17024 llvm::FunctionType *FTy =
17025 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
17026 llvm::FunctionCallee Func =
17027 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
17028
17029 FieldValue =
17030 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
17031 }
17032 assert(FieldValue &&
17033 "SupportMethod value is not defined in PPCTargetParser.def.");
17034
17035 if (Mask)
17036 FieldValue = Builder.CreateAnd(FieldValue, Mask);
17037
17038 llvm::Type *ValueType = FieldValue->getType();
17039 bool IsValueType64Bit = ValueType->isIntegerTy(64);
17040 assert(
17041 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
17042 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17043
17044 return Builder.CreateICmp(
17045 CompOp, FieldValue,
17046 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
17047 };
17048
17049 switch (BuiltinID) {
17050 default: return nullptr;
17051
17052 case Builtin::BI__builtin_cpu_is: {
17053 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17054 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17055 llvm::Triple Triple = getTarget().getTriple();
17056
17057 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
17058 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
17059
17060 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
17061 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
17062#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17063 AIXID) \
17064 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17065#include "llvm/TargetParser/PPCTargetParser.def"
17066 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
17067 BUILTIN_PPC_UNSUPPORTED, 0}));
17068
17069 if (Triple.isOSAIX()) {
17070 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17071 "Invalid CPU name. Missed by SemaChecking?");
17072 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
17073 ICmpInst::ICMP_EQ, AIXIDValue);
17074 }
17075
17076 assert(Triple.isOSLinux() &&
17077 "__builtin_cpu_is() is only supported for AIX and Linux.");
17078
17079 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17080 "Invalid CPU name. Missed by SemaChecking?");
17081
17082 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
17083 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17084
17085 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
17086 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17087 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
17088 return Builder.CreateICmpEQ(TheCall,
17089 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
17090 }
17091 case Builtin::BI__builtin_cpu_supports: {
17092 llvm::Triple Triple = getTarget().getTriple();
17093 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17094 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17095 if (Triple.isOSAIX()) {
17096 unsigned SupportMethod, FieldIdx, Mask, Value;
17097 CmpInst::Predicate CompOp;
17098 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
17099 unsigned>
17100 CPUSupportType;
17101 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
17102 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
17103#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17104 VALUE) \
17105 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17106#include "llvm/TargetParser/PPCTargetParser.def"
17107 .Default({BUILTIN_PPC_FALSE, 0, 0,
17108 CmpInst::Predicate(), 0}));
17109 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17110 Value);
17111 }
17112
17113 assert(Triple.isOSLinux() &&
17114 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17115 unsigned FeatureWord;
17116 unsigned BitMask;
17117 std::tie(FeatureWord, BitMask) =
17118 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17119#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17120 .Case(Name, {FA_WORD, Bitmask})
17121#include "llvm/TargetParser/PPCTargetParser.def"
17122 .Default({0, 0});
17123 if (!BitMask)
17124 return Builder.getFalse();
17125 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
17126 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17127 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
17128 Value *Mask =
17129 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
17130 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
17131#undef PPC_FAWORD_HWCAP
17132#undef PPC_FAWORD_HWCAP2
17133#undef PPC_FAWORD_CPUID
17134 }
17135
17136 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17137 // call __builtin_readcyclecounter.
17138 case PPC::BI__builtin_ppc_get_timebase:
17139 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
17140
17141 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17142 case PPC::BI__builtin_altivec_lvx:
17143 case PPC::BI__builtin_altivec_lvxl:
17144 case PPC::BI__builtin_altivec_lvebx:
17145 case PPC::BI__builtin_altivec_lvehx:
17146 case PPC::BI__builtin_altivec_lvewx:
17147 case PPC::BI__builtin_altivec_lvsl:
17148 case PPC::BI__builtin_altivec_lvsr:
17149 case PPC::BI__builtin_vsx_lxvd2x:
17150 case PPC::BI__builtin_vsx_lxvw4x:
17151 case PPC::BI__builtin_vsx_lxvd2x_be:
17152 case PPC::BI__builtin_vsx_lxvw4x_be:
17153 case PPC::BI__builtin_vsx_lxvl:
17154 case PPC::BI__builtin_vsx_lxvll:
17155 {
17157 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17158 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17159 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17160 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17161 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17162 Ops.pop_back();
17163 }
17164
17165 switch (BuiltinID) {
17166 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17167 case PPC::BI__builtin_altivec_lvx:
17168 ID = Intrinsic::ppc_altivec_lvx;
17169 break;
17170 case PPC::BI__builtin_altivec_lvxl:
17171 ID = Intrinsic::ppc_altivec_lvxl;
17172 break;
17173 case PPC::BI__builtin_altivec_lvebx:
17174 ID = Intrinsic::ppc_altivec_lvebx;
17175 break;
17176 case PPC::BI__builtin_altivec_lvehx:
17177 ID = Intrinsic::ppc_altivec_lvehx;
17178 break;
17179 case PPC::BI__builtin_altivec_lvewx:
17180 ID = Intrinsic::ppc_altivec_lvewx;
17181 break;
17182 case PPC::BI__builtin_altivec_lvsl:
17183 ID = Intrinsic::ppc_altivec_lvsl;
17184 break;
17185 case PPC::BI__builtin_altivec_lvsr:
17186 ID = Intrinsic::ppc_altivec_lvsr;
17187 break;
17188 case PPC::BI__builtin_vsx_lxvd2x:
17189 ID = Intrinsic::ppc_vsx_lxvd2x;
17190 break;
17191 case PPC::BI__builtin_vsx_lxvw4x:
17192 ID = Intrinsic::ppc_vsx_lxvw4x;
17193 break;
17194 case PPC::BI__builtin_vsx_lxvd2x_be:
17195 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17196 break;
17197 case PPC::BI__builtin_vsx_lxvw4x_be:
17198 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17199 break;
17200 case PPC::BI__builtin_vsx_lxvl:
17201 ID = Intrinsic::ppc_vsx_lxvl;
17202 break;
17203 case PPC::BI__builtin_vsx_lxvll:
17204 ID = Intrinsic::ppc_vsx_lxvll;
17205 break;
17206 }
17207 llvm::Function *F = CGM.getIntrinsic(ID);
17208 return Builder.CreateCall(F, Ops, "");
17209 }
17210
17211 // vec_st, vec_xst_be
17212 case PPC::BI__builtin_altivec_stvx:
17213 case PPC::BI__builtin_altivec_stvxl:
17214 case PPC::BI__builtin_altivec_stvebx:
17215 case PPC::BI__builtin_altivec_stvehx:
17216 case PPC::BI__builtin_altivec_stvewx:
17217 case PPC::BI__builtin_vsx_stxvd2x:
17218 case PPC::BI__builtin_vsx_stxvw4x:
17219 case PPC::BI__builtin_vsx_stxvd2x_be:
17220 case PPC::BI__builtin_vsx_stxvw4x_be:
17221 case PPC::BI__builtin_vsx_stxvl:
17222 case PPC::BI__builtin_vsx_stxvll:
17223 {
17225 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17226 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17227 Ops.push_back(EmitScalarExpr(E->getArg(2)));
17228 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17229 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17230 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17231 Ops.pop_back();
17232 }
17233
17234 switch (BuiltinID) {
17235 default: llvm_unreachable("Unsupported st intrinsic!");
17236 case PPC::BI__builtin_altivec_stvx:
17237 ID = Intrinsic::ppc_altivec_stvx;
17238 break;
17239 case PPC::BI__builtin_altivec_stvxl:
17240 ID = Intrinsic::ppc_altivec_stvxl;
17241 break;
17242 case PPC::BI__builtin_altivec_stvebx:
17243 ID = Intrinsic::ppc_altivec_stvebx;
17244 break;
17245 case PPC::BI__builtin_altivec_stvehx:
17246 ID = Intrinsic::ppc_altivec_stvehx;
17247 break;
17248 case PPC::BI__builtin_altivec_stvewx:
17249 ID = Intrinsic::ppc_altivec_stvewx;
17250 break;
17251 case PPC::BI__builtin_vsx_stxvd2x:
17252 ID = Intrinsic::ppc_vsx_stxvd2x;
17253 break;
17254 case PPC::BI__builtin_vsx_stxvw4x:
17255 ID = Intrinsic::ppc_vsx_stxvw4x;
17256 break;
17257 case PPC::BI__builtin_vsx_stxvd2x_be:
17258 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17259 break;
17260 case PPC::BI__builtin_vsx_stxvw4x_be:
17261 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17262 break;
17263 case PPC::BI__builtin_vsx_stxvl:
17264 ID = Intrinsic::ppc_vsx_stxvl;
17265 break;
17266 case PPC::BI__builtin_vsx_stxvll:
17267 ID = Intrinsic::ppc_vsx_stxvll;
17268 break;
17269 }
17270 llvm::Function *F = CGM.getIntrinsic(ID);
17271 return Builder.CreateCall(F, Ops, "");
17272 }
17273 case PPC::BI__builtin_vsx_ldrmb: {
17274 // Essentially boils down to performing an unaligned VMX load sequence so
17275 // as to avoid crossing a page boundary and then shuffling the elements
17276 // into the right side of the vector register.
17277 Value *Op0 = EmitScalarExpr(E->getArg(0));
17278 Value *Op1 = EmitScalarExpr(E->getArg(1));
17279 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17280 llvm::Type *ResTy = ConvertType(E->getType());
17281 bool IsLE = getTarget().isLittleEndian();
17282
17283 // If the user wants the entire vector, just load the entire vector.
17284 if (NumBytes == 16) {
17285 Value *LD =
17287 if (!IsLE)
17288 return LD;
17289
17290 // Reverse the bytes on LE.
17291 SmallVector<int, 16> RevMask;
17292 for (int Idx = 0; Idx < 16; Idx++)
17293 RevMask.push_back(15 - Idx);
17294 return Builder.CreateShuffleVector(LD, LD, RevMask);
17295 }
17296
17297 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17298 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17299 : Intrinsic::ppc_altivec_lvsl);
17300 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17301 Value *HiMem = Builder.CreateGEP(
17302 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
17303 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
17304 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
17305 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
17306
17307 Op0 = IsLE ? HiLd : LoLd;
17308 Op1 = IsLE ? LoLd : HiLd;
17309 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
17310 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
17311
17312 if (IsLE) {
17313 SmallVector<int, 16> Consts;
17314 for (int Idx = 0; Idx < 16; Idx++) {
17315 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17316 : 16 - (NumBytes - Idx);
17317 Consts.push_back(Val);
17318 }
17319 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
17320 Zero, Consts);
17321 }
17323 for (int Idx = 0; Idx < 16; Idx++)
17324 Consts.push_back(Builder.getInt8(NumBytes + Idx));
17325 Value *Mask2 = ConstantVector::get(Consts);
17326 return Builder.CreateBitCast(
17327 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
17328 }
17329 case PPC::BI__builtin_vsx_strmb: {
17330 Value *Op0 = EmitScalarExpr(E->getArg(0));
17331 Value *Op1 = EmitScalarExpr(E->getArg(1));
17332 Value *Op2 = EmitScalarExpr(E->getArg(2));
17333 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17334 bool IsLE = getTarget().isLittleEndian();
17335 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17336 // Storing the whole vector, simply store it on BE and reverse bytes and
17337 // store on LE.
17338 if (Width == 16) {
17339 Value *StVec = Op2;
17340 if (IsLE) {
17341 SmallVector<int, 16> RevMask;
17342 for (int Idx = 0; Idx < 16; Idx++)
17343 RevMask.push_back(15 - Idx);
17344 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
17345 }
17346 return Builder.CreateStore(
17347 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
17348 }
17349 auto *ConvTy = Int64Ty;
17350 unsigned NumElts = 0;
17351 switch (Width) {
17352 default:
17353 llvm_unreachable("width for stores must be a power of 2");
17354 case 8:
17355 ConvTy = Int64Ty;
17356 NumElts = 2;
17357 break;
17358 case 4:
17359 ConvTy = Int32Ty;
17360 NumElts = 4;
17361 break;
17362 case 2:
17363 ConvTy = Int16Ty;
17364 NumElts = 8;
17365 break;
17366 case 1:
17367 ConvTy = Int8Ty;
17368 NumElts = 16;
17369 break;
17370 }
17371 Value *Vec = Builder.CreateBitCast(
17372 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
17373 Value *Ptr =
17374 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
17375 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
17376 if (IsLE && Width > 1) {
17377 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
17378 Elt = Builder.CreateCall(F, Elt);
17379 }
17380 return Builder.CreateStore(
17381 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
17382 };
17383 unsigned Stored = 0;
17384 unsigned RemainingBytes = NumBytes;
17385 Value *Result;
17386 if (NumBytes == 16)
17387 return StoreSubVec(16, 0, 0);
17388 if (NumBytes >= 8) {
17389 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
17390 RemainingBytes -= 8;
17391 Stored += 8;
17392 }
17393 if (RemainingBytes >= 4) {
17394 Result = StoreSubVec(4, NumBytes - Stored - 4,
17395 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
17396 RemainingBytes -= 4;
17397 Stored += 4;
17398 }
17399 if (RemainingBytes >= 2) {
17400 Result = StoreSubVec(2, NumBytes - Stored - 2,
17401 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17402 RemainingBytes -= 2;
17403 Stored += 2;
17404 }
17405 if (RemainingBytes)
17406 Result =
17407 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
17408 return Result;
17409 }
17410 // Square root
17411 case PPC::BI__builtin_vsx_xvsqrtsp:
17412 case PPC::BI__builtin_vsx_xvsqrtdp: {
17413 llvm::Type *ResultType = ConvertType(E->getType());
17414 Value *X = EmitScalarExpr(E->getArg(0));
17415 if (Builder.getIsFPConstrained()) {
17416 llvm::Function *F = CGM.getIntrinsic(
17417 Intrinsic::experimental_constrained_sqrt, ResultType);
17418 return Builder.CreateConstrainedFPCall(F, X);
17419 } else {
17420 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17421 return Builder.CreateCall(F, X);
17422 }
17423 }
17424 // Count leading zeros
17425 case PPC::BI__builtin_altivec_vclzb:
17426 case PPC::BI__builtin_altivec_vclzh:
17427 case PPC::BI__builtin_altivec_vclzw:
17428 case PPC::BI__builtin_altivec_vclzd: {
17429 llvm::Type *ResultType = ConvertType(E->getType());
17430 Value *X = EmitScalarExpr(E->getArg(0));
17431 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17432 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
17433 return Builder.CreateCall(F, {X, Undef});
17434 }
17435 case PPC::BI__builtin_altivec_vctzb:
17436 case PPC::BI__builtin_altivec_vctzh:
17437 case PPC::BI__builtin_altivec_vctzw:
17438 case PPC::BI__builtin_altivec_vctzd: {
17439 llvm::Type *ResultType = ConvertType(E->getType());
17440 Value *X = EmitScalarExpr(E->getArg(0));
17441 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17442 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
17443 return Builder.CreateCall(F, {X, Undef});
17444 }
17445 case PPC::BI__builtin_altivec_vinsd:
17446 case PPC::BI__builtin_altivec_vinsw:
17447 case PPC::BI__builtin_altivec_vinsd_elt:
17448 case PPC::BI__builtin_altivec_vinsw_elt: {
17449 llvm::Type *ResultType = ConvertType(E->getType());
17450 Value *Op0 = EmitScalarExpr(E->getArg(0));
17451 Value *Op1 = EmitScalarExpr(E->getArg(1));
17452 Value *Op2 = EmitScalarExpr(E->getArg(2));
17453
17454 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17455 BuiltinID == PPC::BI__builtin_altivec_vinsd);
17456
17457 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17458 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
17459
17460 // The third argument must be a compile time constant.
17461 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17462 assert(ArgCI &&
17463 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
17464
17465 // Valid value for the third argument is dependent on the input type and
17466 // builtin called.
17467 int ValidMaxValue = 0;
17468 if (IsUnaligned)
17469 ValidMaxValue = (Is32bit) ? 12 : 8;
17470 else
17471 ValidMaxValue = (Is32bit) ? 3 : 1;
17472
17473 // Get value of third argument.
17474 int64_t ConstArg = ArgCI->getSExtValue();
17475
17476 // Compose range checking error message.
17477 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
17478 RangeErrMsg += " number " + llvm::to_string(ConstArg);
17479 RangeErrMsg += " is outside of the valid range [0, ";
17480 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
17481
17482 // Issue error if third argument is not within the valid range.
17483 if (ConstArg < 0 || ConstArg > ValidMaxValue)
17484 CGM.Error(E->getExprLoc(), RangeErrMsg);
17485
17486 // Input to vec_replace_elt is an element index, convert to byte index.
17487 if (!IsUnaligned) {
17488 ConstArg *= Is32bit ? 4 : 8;
17489 // Fix the constant according to endianess.
17490 if (getTarget().isLittleEndian())
17491 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
17492 }
17493
17494 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
17495 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
17496 // Casting input to vector int as per intrinsic definition.
17497 Op0 =
17498 Is32bit
17499 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
17500 : Builder.CreateBitCast(Op0,
17501 llvm::FixedVectorType::get(Int64Ty, 2));
17502 return Builder.CreateBitCast(
17503 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
17504 }
17505 case PPC::BI__builtin_altivec_vpopcntb:
17506 case PPC::BI__builtin_altivec_vpopcnth:
17507 case PPC::BI__builtin_altivec_vpopcntw:
17508 case PPC::BI__builtin_altivec_vpopcntd: {
17509 llvm::Type *ResultType = ConvertType(E->getType());
17510 Value *X = EmitScalarExpr(E->getArg(0));
17511 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
17512 return Builder.CreateCall(F, X);
17513 }
17514 case PPC::BI__builtin_altivec_vadduqm:
17515 case PPC::BI__builtin_altivec_vsubuqm: {
17516 Value *Op0 = EmitScalarExpr(E->getArg(0));
17517 Value *Op1 = EmitScalarExpr(E->getArg(1));
17518 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17519 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
17520 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
17521 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
17522 return Builder.CreateAdd(Op0, Op1, "vadduqm");
17523 else
17524 return Builder.CreateSub(Op0, Op1, "vsubuqm");
17525 }
17526 case PPC::BI__builtin_altivec_vaddcuq_c:
17527 case PPC::BI__builtin_altivec_vsubcuq_c: {
17529 Value *Op0 = EmitScalarExpr(E->getArg(0));
17530 Value *Op1 = EmitScalarExpr(E->getArg(1));
17531 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17532 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17533 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17534 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17535 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
17536 ? Intrinsic::ppc_altivec_vaddcuq
17537 : Intrinsic::ppc_altivec_vsubcuq;
17538 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17539 }
17540 case PPC::BI__builtin_altivec_vaddeuqm_c:
17541 case PPC::BI__builtin_altivec_vaddecuq_c:
17542 case PPC::BI__builtin_altivec_vsubeuqm_c:
17543 case PPC::BI__builtin_altivec_vsubecuq_c: {
17545 Value *Op0 = EmitScalarExpr(E->getArg(0));
17546 Value *Op1 = EmitScalarExpr(E->getArg(1));
17547 Value *Op2 = EmitScalarExpr(E->getArg(2));
17548 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17549 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17550 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17551 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17552 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
17553 switch (BuiltinID) {
17554 default:
17555 llvm_unreachable("Unsupported intrinsic!");
17556 case PPC::BI__builtin_altivec_vaddeuqm_c:
17557 ID = Intrinsic::ppc_altivec_vaddeuqm;
17558 break;
17559 case PPC::BI__builtin_altivec_vaddecuq_c:
17560 ID = Intrinsic::ppc_altivec_vaddecuq;
17561 break;
17562 case PPC::BI__builtin_altivec_vsubeuqm_c:
17563 ID = Intrinsic::ppc_altivec_vsubeuqm;
17564 break;
17565 case PPC::BI__builtin_altivec_vsubecuq_c:
17566 ID = Intrinsic::ppc_altivec_vsubecuq;
17567 break;
17568 }
17569 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17570 }
17571 case PPC::BI__builtin_ppc_rldimi:
17572 case PPC::BI__builtin_ppc_rlwimi: {
17573 Value *Op0 = EmitScalarExpr(E->getArg(0));
17574 Value *Op1 = EmitScalarExpr(E->getArg(1));
17575 Value *Op2 = EmitScalarExpr(E->getArg(2));
17576 Value *Op3 = EmitScalarExpr(E->getArg(3));
17577 // rldimi is 64-bit instruction, expand the intrinsic before isel to
17578 // leverage peephole and avoid legalization efforts.
17579 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
17580 !getTarget().getTriple().isPPC64()) {
17581 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
17582 Op2 = Builder.CreateZExt(Op2, Int64Ty);
17583 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
17584 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
17585 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
17586 }
17587 return Builder.CreateCall(
17588 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
17589 ? Intrinsic::ppc_rldimi
17590 : Intrinsic::ppc_rlwimi),
17591 {Op0, Op1, Op2, Op3});
17592 }
17593 case PPC::BI__builtin_ppc_rlwnm: {
17594 Value *Op0 = EmitScalarExpr(E->getArg(0));
17595 Value *Op1 = EmitScalarExpr(E->getArg(1));
17596 Value *Op2 = EmitScalarExpr(E->getArg(2));
17597 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
17598 {Op0, Op1, Op2});
17599 }
17600 case PPC::BI__builtin_ppc_poppar4:
17601 case PPC::BI__builtin_ppc_poppar8: {
17602 Value *Op0 = EmitScalarExpr(E->getArg(0));
17603 llvm::Type *ArgType = Op0->getType();
17604 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
17605 Value *Tmp = Builder.CreateCall(F, Op0);
17606
17607 llvm::Type *ResultType = ConvertType(E->getType());
17608 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
17609 if (Result->getType() != ResultType)
17610 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
17611 "cast");
17612 return Result;
17613 }
17614 case PPC::BI__builtin_ppc_cmpb: {
17615 Value *Op0 = EmitScalarExpr(E->getArg(0));
17616 Value *Op1 = EmitScalarExpr(E->getArg(1));
17617 if (getTarget().getTriple().isPPC64()) {
17618 Function *F =
17619 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
17620 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
17621 }
17622 // For 32 bit, emit the code as below:
17623 // %conv = trunc i64 %a to i32
17624 // %conv1 = trunc i64 %b to i32
17625 // %shr = lshr i64 %a, 32
17626 // %conv2 = trunc i64 %shr to i32
17627 // %shr3 = lshr i64 %b, 32
17628 // %conv4 = trunc i64 %shr3 to i32
17629 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
17630 // %conv5 = zext i32 %0 to i64
17631 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
17632 // %conv614 = zext i32 %1 to i64
17633 // %shl = shl nuw i64 %conv614, 32
17634 // %or = or i64 %shl, %conv5
17635 // ret i64 %or
17636 Function *F =
17637 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
17638 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
17639 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
17640 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
17641 Value *ArgOneHi =
17642 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
17643 Value *ArgTwoHi =
17644 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
17645 Value *ResLo = Builder.CreateZExt(
17646 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
17647 Value *ResHiShift = Builder.CreateZExt(
17648 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
17649 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
17650 return Builder.CreateOr(ResLo, ResHi);
17651 }
17652 // Copy sign
17653 case PPC::BI__builtin_vsx_xvcpsgnsp:
17654 case PPC::BI__builtin_vsx_xvcpsgndp: {
17655 llvm::Type *ResultType = ConvertType(E->getType());
17656 Value *X = EmitScalarExpr(E->getArg(0));
17657 Value *Y = EmitScalarExpr(E->getArg(1));
17658 ID = Intrinsic::copysign;
17659 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17660 return Builder.CreateCall(F, {X, Y});
17661 }
17662 // Rounding/truncation
17663 case PPC::BI__builtin_vsx_xvrspip:
17664 case PPC::BI__builtin_vsx_xvrdpip:
17665 case PPC::BI__builtin_vsx_xvrdpim:
17666 case PPC::BI__builtin_vsx_xvrspim:
17667 case PPC::BI__builtin_vsx_xvrdpi:
17668 case PPC::BI__builtin_vsx_xvrspi:
17669 case PPC::BI__builtin_vsx_xvrdpic:
17670 case PPC::BI__builtin_vsx_xvrspic:
17671 case PPC::BI__builtin_vsx_xvrdpiz:
17672 case PPC::BI__builtin_vsx_xvrspiz: {
17673 llvm::Type *ResultType = ConvertType(E->getType());
17674 Value *X = EmitScalarExpr(E->getArg(0));
17675 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
17676 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
17677 ID = Builder.getIsFPConstrained()
17678 ? Intrinsic::experimental_constrained_floor
17679 : Intrinsic::floor;
17680 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
17681 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
17682 ID = Builder.getIsFPConstrained()
17683 ? Intrinsic::experimental_constrained_round
17684 : Intrinsic::round;
17685 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
17686 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
17687 ID = Builder.getIsFPConstrained()
17688 ? Intrinsic::experimental_constrained_rint
17689 : Intrinsic::rint;
17690 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
17691 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
17692 ID = Builder.getIsFPConstrained()
17693 ? Intrinsic::experimental_constrained_ceil
17694 : Intrinsic::ceil;
17695 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
17696 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
17697 ID = Builder.getIsFPConstrained()
17698 ? Intrinsic::experimental_constrained_trunc
17699 : Intrinsic::trunc;
17700 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17701 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
17702 : Builder.CreateCall(F, X);
17703 }
17704
17705 // Absolute value
17706 case PPC::BI__builtin_vsx_xvabsdp:
17707 case PPC::BI__builtin_vsx_xvabssp: {
17708 llvm::Type *ResultType = ConvertType(E->getType());
17709 Value *X = EmitScalarExpr(E->getArg(0));
17710 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17711 return Builder.CreateCall(F, X);
17712 }
17713
17714 // Fastmath by default
17715 case PPC::BI__builtin_ppc_recipdivf:
17716 case PPC::BI__builtin_ppc_recipdivd:
17717 case PPC::BI__builtin_ppc_rsqrtf:
17718 case PPC::BI__builtin_ppc_rsqrtd: {
17719 FastMathFlags FMF = Builder.getFastMathFlags();
17720 Builder.getFastMathFlags().setFast();
17721 llvm::Type *ResultType = ConvertType(E->getType());
17722 Value *X = EmitScalarExpr(E->getArg(0));
17723
17724 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
17725 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
17726 Value *Y = EmitScalarExpr(E->getArg(1));
17727 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
17728 Builder.getFastMathFlags() &= (FMF);
17729 return FDiv;
17730 }
17731 auto *One = ConstantFP::get(ResultType, 1.0);
17732 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17733 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
17734 Builder.getFastMathFlags() &= (FMF);
17735 return FDiv;
17736 }
17737 case PPC::BI__builtin_ppc_alignx: {
17738 Value *Op0 = EmitScalarExpr(E->getArg(0));
17739 Value *Op1 = EmitScalarExpr(E->getArg(1));
17740 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
17741 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
17742 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
17743 llvm::Value::MaximumAlignment);
17744
17745 emitAlignmentAssumption(Op1, E->getArg(1),
17746 /*The expr loc is sufficient.*/ SourceLocation(),
17747 AlignmentCI, nullptr);
17748 return Op1;
17749 }
17750 case PPC::BI__builtin_ppc_rdlam: {
17751 Value *Op0 = EmitScalarExpr(E->getArg(0));
17752 Value *Op1 = EmitScalarExpr(E->getArg(1));
17753 Value *Op2 = EmitScalarExpr(E->getArg(2));
17754 llvm::Type *Ty = Op0->getType();
17755 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
17756 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
17757 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
17758 return Builder.CreateAnd(Rotate, Op2);
17759 }
17760 case PPC::BI__builtin_ppc_load2r: {
17761 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
17762 Value *Op0 = EmitScalarExpr(E->getArg(0));
17763 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
17764 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
17765 }
17766 // FMA variations
17767 case PPC::BI__builtin_ppc_fnmsub:
17768 case PPC::BI__builtin_ppc_fnmsubs:
17769 case PPC::BI__builtin_vsx_xvmaddadp:
17770 case PPC::BI__builtin_vsx_xvmaddasp:
17771 case PPC::BI__builtin_vsx_xvnmaddadp:
17772 case PPC::BI__builtin_vsx_xvnmaddasp:
17773 case PPC::BI__builtin_vsx_xvmsubadp:
17774 case PPC::BI__builtin_vsx_xvmsubasp:
17775 case PPC::BI__builtin_vsx_xvnmsubadp:
17776 case PPC::BI__builtin_vsx_xvnmsubasp: {
17777 llvm::Type *ResultType = ConvertType(E->getType());
17778 Value *X = EmitScalarExpr(E->getArg(0));
17779 Value *Y = EmitScalarExpr(E->getArg(1));
17780 Value *Z = EmitScalarExpr(E->getArg(2));
17781 llvm::Function *F;
17782 if (Builder.getIsFPConstrained())
17783 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17784 else
17785 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17786 switch (BuiltinID) {
17787 case PPC::BI__builtin_vsx_xvmaddadp:
17788 case PPC::BI__builtin_vsx_xvmaddasp:
17789 if (Builder.getIsFPConstrained())
17790 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17791 else
17792 return Builder.CreateCall(F, {X, Y, Z});
17793 case PPC::BI__builtin_vsx_xvnmaddadp:
17794 case PPC::BI__builtin_vsx_xvnmaddasp:
17795 if (Builder.getIsFPConstrained())
17796 return Builder.CreateFNeg(
17797 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17798 else
17799 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17800 case PPC::BI__builtin_vsx_xvmsubadp:
17801 case PPC::BI__builtin_vsx_xvmsubasp:
17802 if (Builder.getIsFPConstrained())
17803 return Builder.CreateConstrainedFPCall(
17804 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17805 else
17806 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17807 case PPC::BI__builtin_ppc_fnmsub:
17808 case PPC::BI__builtin_ppc_fnmsubs:
17809 case PPC::BI__builtin_vsx_xvnmsubadp:
17810 case PPC::BI__builtin_vsx_xvnmsubasp:
17811 if (Builder.getIsFPConstrained())
17812 return Builder.CreateFNeg(
17813 Builder.CreateConstrainedFPCall(
17814 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
17815 "neg");
17816 else
17817 return Builder.CreateCall(
17818 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
17819 }
17820 llvm_unreachable("Unknown FMA operation");
17821 return nullptr; // Suppress no-return warning
17822 }
17823
17824 case PPC::BI__builtin_vsx_insertword: {
17825 Value *Op0 = EmitScalarExpr(E->getArg(0));
17826 Value *Op1 = EmitScalarExpr(E->getArg(1));
17827 Value *Op2 = EmitScalarExpr(E->getArg(2));
17828 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
17829
17830 // Third argument is a compile time constant int. It must be clamped to
17831 // to the range [0, 12].
17832 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17833 assert(ArgCI &&
17834 "Third arg to xxinsertw intrinsic must be constant integer");
17835 const int64_t MaxIndex = 12;
17836 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17837
17838 // The builtin semantics don't exactly match the xxinsertw instructions
17839 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
17840 // word from the first argument, and inserts it in the second argument. The
17841 // instruction extracts the word from its second input register and inserts
17842 // it into its first input register, so swap the first and second arguments.
17843 std::swap(Op0, Op1);
17844
17845 // Need to cast the second argument from a vector of unsigned int to a
17846 // vector of long long.
17847 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17848
17849 if (getTarget().isLittleEndian()) {
17850 // Reverse the double words in the vector we will extract from.
17851 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17852 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
17853
17854 // Reverse the index.
17855 Index = MaxIndex - Index;
17856 }
17857
17858 // Intrinsic expects the first arg to be a vector of int.
17859 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17860 Op2 = ConstantInt::getSigned(Int32Ty, Index);
17861 return Builder.CreateCall(F, {Op0, Op1, Op2});
17862 }
17863
17864 case PPC::BI__builtin_vsx_extractuword: {
17865 Value *Op0 = EmitScalarExpr(E->getArg(0));
17866 Value *Op1 = EmitScalarExpr(E->getArg(1));
17867 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
17868
17869 // Intrinsic expects the first argument to be a vector of doublewords.
17870 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17871
17872 // The second argument is a compile time constant int that needs to
17873 // be clamped to the range [0, 12].
17874 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
17875 assert(ArgCI &&
17876 "Second Arg to xxextractuw intrinsic must be a constant integer!");
17877 const int64_t MaxIndex = 12;
17878 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17879
17880 if (getTarget().isLittleEndian()) {
17881 // Reverse the index.
17882 Index = MaxIndex - Index;
17883 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17884
17885 // Emit the call, then reverse the double words of the results vector.
17886 Value *Call = Builder.CreateCall(F, {Op0, Op1});
17887
17888 Value *ShuffleCall =
17889 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
17890 return ShuffleCall;
17891 } else {
17892 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17893 return Builder.CreateCall(F, {Op0, Op1});
17894 }
17895 }
17896
17897 case PPC::BI__builtin_vsx_xxpermdi: {
17898 Value *Op0 = EmitScalarExpr(E->getArg(0));
17899 Value *Op1 = EmitScalarExpr(E->getArg(1));
17900 Value *Op2 = EmitScalarExpr(E->getArg(2));
17901 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17902 assert(ArgCI && "Third arg must be constant integer!");
17903
17904 unsigned Index = ArgCI->getZExtValue();
17905 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17906 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17907
17908 // Account for endianness by treating this as just a shuffle. So we use the
17909 // same indices for both LE and BE in order to produce expected results in
17910 // both cases.
17911 int ElemIdx0 = (Index & 2) >> 1;
17912 int ElemIdx1 = 2 + (Index & 1);
17913
17914 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
17915 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17916 QualType BIRetType = E->getType();
17917 auto RetTy = ConvertType(BIRetType);
17918 return Builder.CreateBitCast(ShuffleCall, RetTy);
17919 }
17920
17921 case PPC::BI__builtin_vsx_xxsldwi: {
17922 Value *Op0 = EmitScalarExpr(E->getArg(0));
17923 Value *Op1 = EmitScalarExpr(E->getArg(1));
17924 Value *Op2 = EmitScalarExpr(E->getArg(2));
17925 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17926 assert(ArgCI && "Third argument must be a compile time constant");
17927 unsigned Index = ArgCI->getZExtValue() & 0x3;
17928 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17929 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
17930
17931 // Create a shuffle mask
17932 int ElemIdx0;
17933 int ElemIdx1;
17934 int ElemIdx2;
17935 int ElemIdx3;
17936 if (getTarget().isLittleEndian()) {
17937 // Little endian element N comes from element 8+N-Index of the
17938 // concatenated wide vector (of course, using modulo arithmetic on
17939 // the total number of elements).
17940 ElemIdx0 = (8 - Index) % 8;
17941 ElemIdx1 = (9 - Index) % 8;
17942 ElemIdx2 = (10 - Index) % 8;
17943 ElemIdx3 = (11 - Index) % 8;
17944 } else {
17945 // Big endian ElemIdx<N> = Index + N
17946 ElemIdx0 = Index;
17947 ElemIdx1 = Index + 1;
17948 ElemIdx2 = Index + 2;
17949 ElemIdx3 = Index + 3;
17950 }
17951
17952 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
17953 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17954 QualType BIRetType = E->getType();
17955 auto RetTy = ConvertType(BIRetType);
17956 return Builder.CreateBitCast(ShuffleCall, RetTy);
17957 }
17958
17959 case PPC::BI__builtin_pack_vector_int128: {
17960 Value *Op0 = EmitScalarExpr(E->getArg(0));
17961 Value *Op1 = EmitScalarExpr(E->getArg(1));
17962 bool isLittleEndian = getTarget().isLittleEndian();
17963 Value *PoisonValue =
17964 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
17965 Value *Res = Builder.CreateInsertElement(
17966 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
17967 Res = Builder.CreateInsertElement(Res, Op1,
17968 (uint64_t)(isLittleEndian ? 0 : 1));
17969 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
17970 }
17971
17972 case PPC::BI__builtin_unpack_vector_int128: {
17973 Value *Op0 = EmitScalarExpr(E->getArg(0));
17974 Value *Op1 = EmitScalarExpr(E->getArg(1));
17975 ConstantInt *Index = cast<ConstantInt>(Op1);
17976 Value *Unpacked = Builder.CreateBitCast(
17977 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
17978
17979 if (getTarget().isLittleEndian())
17980 Index =
17981 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
17982
17983 return Builder.CreateExtractElement(Unpacked, Index);
17984 }
17985
17986 case PPC::BI__builtin_ppc_sthcx: {
17987 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
17988 Value *Op0 = EmitScalarExpr(E->getArg(0));
17989 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
17990 return Builder.CreateCall(F, {Op0, Op1});
17991 }
17992
17993 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
17994 // Some of the MMA instructions accumulate their result into an existing
17995 // accumulator whereas the others generate a new accumulator. So we need to
17996 // use custom code generation to expand a builtin call with a pointer to a
17997 // load (if the corresponding instruction accumulates its result) followed by
17998 // the call to the intrinsic and a store of the result.
17999#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18000 case PPC::BI__builtin_##Name:
18001#include "clang/Basic/BuiltinsPPC.def"
18002 {
18004 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
18005 if (E->getArg(i)->getType()->isArrayType())
18006 Ops.push_back(
18007 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
18008 else
18009 Ops.push_back(EmitScalarExpr(E->getArg(i)));
18010 // The first argument of these two builtins is a pointer used to store their
18011 // result. However, the llvm intrinsics return their result in multiple
18012 // return values. So, here we emit code extracting these values from the
18013 // intrinsic results and storing them using that pointer.
18014 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
18015 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
18016 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
18017 unsigned NumVecs = 2;
18018 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
18019 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
18020 NumVecs = 4;
18021 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
18022 }
18023 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
18024 Address Addr = EmitPointerWithAlignment(E->getArg(1));
18025 Value *Vec = Builder.CreateLoad(Addr);
18026 Value *Call = Builder.CreateCall(F, {Vec});
18027 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
18028 Value *Ptr = Ops[0];
18029 for (unsigned i=0; i<NumVecs; i++) {
18030 Value *Vec = Builder.CreateExtractValue(Call, i);
18031 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
18032 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
18033 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
18034 }
18035 return Call;
18036 }
18037 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
18038 BuiltinID == PPC::BI__builtin_mma_build_acc) {
18039 // Reverse the order of the operands for LE, so the
18040 // same builtin call can be used on both LE and BE
18041 // without the need for the programmer to swap operands.
18042 // The operands are reversed starting from the second argument,
18043 // the first operand is the pointer to the pair/accumulator
18044 // that is being built.
18045 if (getTarget().isLittleEndian())
18046 std::reverse(Ops.begin() + 1, Ops.end());
18047 }
18048 bool Accumulate;
18049 switch (BuiltinID) {
18050 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18051 case PPC::BI__builtin_##Name: \
18052 ID = Intrinsic::ppc_##Intr; \
18053 Accumulate = Acc; \
18054 break;
18055 #include "clang/Basic/BuiltinsPPC.def"
18056 }
18057 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18058 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
18059 BuiltinID == PPC::BI__builtin_mma_lxvp ||
18060 BuiltinID == PPC::BI__builtin_mma_stxvp) {
18061 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18062 BuiltinID == PPC::BI__builtin_mma_lxvp) {
18063 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
18064 } else {
18065 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
18066 }
18067 Ops.pop_back();
18068 llvm::Function *F = CGM.getIntrinsic(ID);
18069 return Builder.CreateCall(F, Ops, "");
18070 }
18071 SmallVector<Value*, 4> CallOps;
18072 if (Accumulate) {
18073 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18074 Value *Acc = Builder.CreateLoad(Addr);
18075 CallOps.push_back(Acc);
18076 }
18077 for (unsigned i=1; i<Ops.size(); i++)
18078 CallOps.push_back(Ops[i]);
18079 llvm::Function *F = CGM.getIntrinsic(ID);
18080 Value *Call = Builder.CreateCall(F, CallOps);
18081 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
18082 }
18083
18084 case PPC::BI__builtin_ppc_compare_and_swap:
18085 case PPC::BI__builtin_ppc_compare_and_swaplp: {
18086 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18087 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
18088 Value *OldVal = Builder.CreateLoad(OldValAddr);
18089 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
18090 LValue LV = MakeAddrLValue(Addr, AtomicTy);
18091 Value *Op2 = EmitScalarExpr(E->getArg(2));
18092 auto Pair = EmitAtomicCompareExchange(
18093 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
18094 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
18095 // Unlike c11's atomic_compare_exchange, according to
18096 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
18097 // > In either case, the contents of the memory location specified by addr
18098 // > are copied into the memory location specified by old_val_addr.
18099 // But it hasn't specified storing to OldValAddr is atomic or not and
18100 // which order to use. Now following XL's codegen, treat it as a normal
18101 // store.
18102 Value *LoadedVal = Pair.first.getScalarVal();
18103 Builder.CreateStore(LoadedVal, OldValAddr);
18104 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
18105 }
18106 case PPC::BI__builtin_ppc_fetch_and_add:
18107 case PPC::BI__builtin_ppc_fetch_and_addlp: {
18108 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
18109 llvm::AtomicOrdering::Monotonic);
18110 }
18111 case PPC::BI__builtin_ppc_fetch_and_and:
18112 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18113 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
18114 llvm::AtomicOrdering::Monotonic);
18115 }
18116
18117 case PPC::BI__builtin_ppc_fetch_and_or:
18118 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18119 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
18120 llvm::AtomicOrdering::Monotonic);
18121 }
18122 case PPC::BI__builtin_ppc_fetch_and_swap:
18123 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18124 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
18125 llvm::AtomicOrdering::Monotonic);
18126 }
18127 case PPC::BI__builtin_ppc_ldarx:
18128 case PPC::BI__builtin_ppc_lwarx:
18129 case PPC::BI__builtin_ppc_lharx:
18130 case PPC::BI__builtin_ppc_lbarx:
18131 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
18132 case PPC::BI__builtin_ppc_mfspr: {
18133 Value *Op0 = EmitScalarExpr(E->getArg(0));
18134 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18135 ? Int32Ty
18136 : Int64Ty;
18137 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
18138 return Builder.CreateCall(F, {Op0});
18139 }
18140 case PPC::BI__builtin_ppc_mtspr: {
18141 Value *Op0 = EmitScalarExpr(E->getArg(0));
18142 Value *Op1 = EmitScalarExpr(E->getArg(1));
18143 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18144 ? Int32Ty
18145 : Int64Ty;
18146 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
18147 return Builder.CreateCall(F, {Op0, Op1});
18148 }
18149 case PPC::BI__builtin_ppc_popcntb: {
18150 Value *ArgValue = EmitScalarExpr(E->getArg(0));
18151 llvm::Type *ArgType = ArgValue->getType();
18152 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
18153 return Builder.CreateCall(F, {ArgValue}, "popcntb");
18154 }
18155 case PPC::BI__builtin_ppc_mtfsf: {
18156 // The builtin takes a uint32 that needs to be cast to an
18157 // f64 to be passed to the intrinsic.
18158 Value *Op0 = EmitScalarExpr(E->getArg(0));
18159 Value *Op1 = EmitScalarExpr(E->getArg(1));
18160 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18161 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18162 return Builder.CreateCall(F, {Op0, Cast}, "");
18163 }
18164
18165 case PPC::BI__builtin_ppc_swdiv_nochk:
18166 case PPC::BI__builtin_ppc_swdivs_nochk: {
18167 Value *Op0 = EmitScalarExpr(E->getArg(0));
18168 Value *Op1 = EmitScalarExpr(E->getArg(1));
18169 FastMathFlags FMF = Builder.getFastMathFlags();
18170 Builder.getFastMathFlags().setFast();
18171 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18172 Builder.getFastMathFlags() &= (FMF);
18173 return FDiv;
18174 }
18175 case PPC::BI__builtin_ppc_fric:
18177 *this, E, Intrinsic::rint,
18178 Intrinsic::experimental_constrained_rint))
18179 .getScalarVal();
18180 case PPC::BI__builtin_ppc_frim:
18181 case PPC::BI__builtin_ppc_frims:
18183 *this, E, Intrinsic::floor,
18184 Intrinsic::experimental_constrained_floor))
18185 .getScalarVal();
18186 case PPC::BI__builtin_ppc_frin:
18187 case PPC::BI__builtin_ppc_frins:
18189 *this, E, Intrinsic::round,
18190 Intrinsic::experimental_constrained_round))
18191 .getScalarVal();
18192 case PPC::BI__builtin_ppc_frip:
18193 case PPC::BI__builtin_ppc_frips:
18195 *this, E, Intrinsic::ceil,
18196 Intrinsic::experimental_constrained_ceil))
18197 .getScalarVal();
18198 case PPC::BI__builtin_ppc_friz:
18199 case PPC::BI__builtin_ppc_frizs:
18201 *this, E, Intrinsic::trunc,
18202 Intrinsic::experimental_constrained_trunc))
18203 .getScalarVal();
18204 case PPC::BI__builtin_ppc_fsqrt:
18205 case PPC::BI__builtin_ppc_fsqrts:
18207 *this, E, Intrinsic::sqrt,
18208 Intrinsic::experimental_constrained_sqrt))
18209 .getScalarVal();
18210 case PPC::BI__builtin_ppc_test_data_class: {
18211 Value *Op0 = EmitScalarExpr(E->getArg(0));
18212 Value *Op1 = EmitScalarExpr(E->getArg(1));
18213 return Builder.CreateCall(
18214 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18215 {Op0, Op1}, "test_data_class");
18216 }
18217 case PPC::BI__builtin_ppc_maxfe: {
18218 Value *Op0 = EmitScalarExpr(E->getArg(0));
18219 Value *Op1 = EmitScalarExpr(E->getArg(1));
18220 Value *Op2 = EmitScalarExpr(E->getArg(2));
18221 Value *Op3 = EmitScalarExpr(E->getArg(3));
18222 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18223 {Op0, Op1, Op2, Op3});
18224 }
18225 case PPC::BI__builtin_ppc_maxfl: {
18226 Value *Op0 = EmitScalarExpr(E->getArg(0));
18227 Value *Op1 = EmitScalarExpr(E->getArg(1));
18228 Value *Op2 = EmitScalarExpr(E->getArg(2));
18229 Value *Op3 = EmitScalarExpr(E->getArg(3));
18230 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18231 {Op0, Op1, Op2, Op3});
18232 }
18233 case PPC::BI__builtin_ppc_maxfs: {
18234 Value *Op0 = EmitScalarExpr(E->getArg(0));
18235 Value *Op1 = EmitScalarExpr(E->getArg(1));
18236 Value *Op2 = EmitScalarExpr(E->getArg(2));
18237 Value *Op3 = EmitScalarExpr(E->getArg(3));
18238 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18239 {Op0, Op1, Op2, Op3});
18240 }
18241 case PPC::BI__builtin_ppc_minfe: {
18242 Value *Op0 = EmitScalarExpr(E->getArg(0));
18243 Value *Op1 = EmitScalarExpr(E->getArg(1));
18244 Value *Op2 = EmitScalarExpr(E->getArg(2));
18245 Value *Op3 = EmitScalarExpr(E->getArg(3));
18246 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18247 {Op0, Op1, Op2, Op3});
18248 }
18249 case PPC::BI__builtin_ppc_minfl: {
18250 Value *Op0 = EmitScalarExpr(E->getArg(0));
18251 Value *Op1 = EmitScalarExpr(E->getArg(1));
18252 Value *Op2 = EmitScalarExpr(E->getArg(2));
18253 Value *Op3 = EmitScalarExpr(E->getArg(3));
18254 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18255 {Op0, Op1, Op2, Op3});
18256 }
18257 case PPC::BI__builtin_ppc_minfs: {
18258 Value *Op0 = EmitScalarExpr(E->getArg(0));
18259 Value *Op1 = EmitScalarExpr(E->getArg(1));
18260 Value *Op2 = EmitScalarExpr(E->getArg(2));
18261 Value *Op3 = EmitScalarExpr(E->getArg(3));
18262 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18263 {Op0, Op1, Op2, Op3});
18264 }
18265 case PPC::BI__builtin_ppc_swdiv:
18266 case PPC::BI__builtin_ppc_swdivs: {
18267 Value *Op0 = EmitScalarExpr(E->getArg(0));
18268 Value *Op1 = EmitScalarExpr(E->getArg(1));
18269 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18270 }
18271 case PPC::BI__builtin_ppc_set_fpscr_rn:
18272 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18273 {EmitScalarExpr(E->getArg(0))});
18274 case PPC::BI__builtin_ppc_mffs:
18275 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18276 }
18277}
18278
18279namespace {
18280// If \p E is not null pointer, insert address space cast to match return
18281// type of \p E if necessary.
18282Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18283 const CallExpr *E = nullptr) {
18284 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18285 auto *Call = CGF.Builder.CreateCall(F);
18286 Call->addRetAttr(
18287 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18288 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18289 if (!E)
18290 return Call;
18291 QualType BuiltinRetType = E->getType();
18292 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18293 if (RetTy == Call->getType())
18294 return Call;
18295 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18296}
18297
18298Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18299 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18300 auto *Call = CGF.Builder.CreateCall(F);
18301 Call->addRetAttr(
18302 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18303 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18304 return Call;
18305}
18306
18307// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18308/// Emit code based on Code Object ABI version.
18309/// COV_4 : Emit code to use dispatch ptr
18310/// COV_5+ : Emit code to use implicitarg ptr
18311/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18312/// and use its value for COV_4 or COV_5+ approach. It is used for
18313/// compiling device libraries in an ABI-agnostic way.
18314///
18315/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18316/// clang during compilation of user code.
18317Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18318 llvm::LoadInst *LD;
18319
18320 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18321
18322 if (Cov == CodeObjectVersionKind::COV_None) {
18323 StringRef Name = "__oclc_ABI_version";
18324 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18325 if (!ABIVersionC)
18326 ABIVersionC = new llvm::GlobalVariable(
18327 CGF.CGM.getModule(), CGF.Int32Ty, false,
18328 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18329 llvm::GlobalVariable::NotThreadLocal,
18331
18332 // This load will be eliminated by the IPSCCP because it is constant
18333 // weak_odr without externally_initialized. Either changing it to weak or
18334 // adding externally_initialized will keep the load.
18335 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18336 CGF.CGM.getIntAlign());
18337
18338 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18339 ABIVersion,
18340 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
18341
18342 // Indexing the implicit kernarg segment.
18343 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18344 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18345
18346 // Indexing the HSA kernel_dispatch_packet struct.
18347 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18348 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18349
18350 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18351 LD = CGF.Builder.CreateLoad(
18353 } else {
18354 Value *GEP = nullptr;
18355 if (Cov >= CodeObjectVersionKind::COV_5) {
18356 // Indexing the implicit kernarg segment.
18357 GEP = CGF.Builder.CreateConstGEP1_32(
18358 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18359 } else {
18360 // Indexing the HSA kernel_dispatch_packet struct.
18361 GEP = CGF.Builder.CreateConstGEP1_32(
18362 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18363 }
18364 LD = CGF.Builder.CreateLoad(
18366 }
18367
18368 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
18369 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
18370 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
18371 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
18372 LD->setMetadata(llvm::LLVMContext::MD_noundef,
18373 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18374 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18375 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18376 return LD;
18377}
18378
18379// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18380Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
18381 const unsigned XOffset = 12;
18382 auto *DP = EmitAMDGPUDispatchPtr(CGF);
18383 // Indexing the HSA kernel_dispatch_packet struct.
18384 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
18385 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
18386 auto *LD = CGF.Builder.CreateLoad(
18388 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18389 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18390 return LD;
18391}
18392} // namespace
18393
18394// For processing memory ordering and memory scope arguments of various
18395// amdgcn builtins.
18396// \p Order takes a C++11 comptabile memory-ordering specifier and converts
18397// it into LLVM's memory ordering specifier using atomic C ABI, and writes
18398// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
18399// specific SyncScopeID and writes it to \p SSID.
18401 llvm::AtomicOrdering &AO,
18402 llvm::SyncScope::ID &SSID) {
18403 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
18404
18405 // Map C11/C++11 memory ordering to LLVM memory ordering
18406 assert(llvm::isValidAtomicOrderingCABI(ord));
18407 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
18408 case llvm::AtomicOrderingCABI::acquire:
18409 case llvm::AtomicOrderingCABI::consume:
18410 AO = llvm::AtomicOrdering::Acquire;
18411 break;
18412 case llvm::AtomicOrderingCABI::release:
18413 AO = llvm::AtomicOrdering::Release;
18414 break;
18415 case llvm::AtomicOrderingCABI::acq_rel:
18416 AO = llvm::AtomicOrdering::AcquireRelease;
18417 break;
18418 case llvm::AtomicOrderingCABI::seq_cst:
18419 AO = llvm::AtomicOrdering::SequentiallyConsistent;
18420 break;
18421 case llvm::AtomicOrderingCABI::relaxed:
18422 AO = llvm::AtomicOrdering::Monotonic;
18423 break;
18424 }
18425
18426 // Some of the atomic builtins take the scope as a string name.
18427 StringRef scp;
18428 if (llvm::getConstantStringInfo(Scope, scp)) {
18429 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18430 return;
18431 }
18432
18433 // Older builtins had an enum argument for the memory scope.
18434 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
18435 switch (scope) {
18436 case 0: // __MEMORY_SCOPE_SYSTEM
18437 SSID = llvm::SyncScope::System;
18438 break;
18439 case 1: // __MEMORY_SCOPE_DEVICE
18440 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
18441 break;
18442 case 2: // __MEMORY_SCOPE_WRKGRP
18443 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
18444 break;
18445 case 3: // __MEMORY_SCOPE_WVFRNT
18446 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
18447 break;
18448 case 4: // __MEMORY_SCOPE_SINGLE
18449 SSID = llvm::SyncScope::SingleThread;
18450 break;
18451 default:
18452 SSID = llvm::SyncScope::System;
18453 break;
18454 }
18455}
18456
18457llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
18458 unsigned Idx,
18459 const CallExpr *E) {
18460 llvm::Value *Arg = nullptr;
18461 if ((ICEArguments & (1 << Idx)) == 0) {
18462 Arg = EmitScalarExpr(E->getArg(Idx));
18463 } else {
18464 // If this is required to be a constant, constant fold it so that we
18465 // know that the generated intrinsic gets a ConstantInt.
18466 std::optional<llvm::APSInt> Result =
18467 E->getArg(Idx)->getIntegerConstantExpr(getContext());
18468 assert(Result && "Expected argument to be a constant");
18469 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
18470 }
18471 return Arg;
18472}
18473
18474Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {
18475 if (QT->hasFloatingRepresentation()) {
18476 switch (elementCount) {
18477 case 2:
18478 return Intrinsic::dx_dot2;
18479 case 3:
18480 return Intrinsic::dx_dot3;
18481 case 4:
18482 return Intrinsic::dx_dot4;
18483 }
18484 }
18486 return Intrinsic::dx_sdot;
18487
18489 return Intrinsic::dx_udot;
18490}
18491
18493 const CallExpr *E) {
18494 if (!getLangOpts().HLSL)
18495 return nullptr;
18496
18497 switch (BuiltinID) {
18498 case Builtin::BI__builtin_hlsl_all: {
18499 Value *Op0 = EmitScalarExpr(E->getArg(0));
18500 return Builder.CreateIntrinsic(
18501 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18502 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18503 "hlsl.all");
18504 }
18505 case Builtin::BI__builtin_hlsl_any: {
18506 Value *Op0 = EmitScalarExpr(E->getArg(0));
18507 return Builder.CreateIntrinsic(
18508 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18509 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18510 "hlsl.any");
18511 }
18512 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
18513 Value *OpX = EmitScalarExpr(E->getArg(0));
18514 Value *OpMin = EmitScalarExpr(E->getArg(1));
18515 Value *OpMax = EmitScalarExpr(E->getArg(2));
18516
18517 QualType Ty = E->getArg(0)->getType();
18518 bool IsUnsigned = false;
18519 if (auto *VecTy = Ty->getAs<VectorType>())
18520 Ty = VecTy->getElementType();
18521 IsUnsigned = Ty->isUnsignedIntegerType();
18522 return Builder.CreateIntrinsic(
18523 /*ReturnType=*/OpX->getType(),
18524 IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
18525 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
18526 }
18527 case Builtin::BI__builtin_hlsl_dot: {
18528 Value *Op0 = EmitScalarExpr(E->getArg(0));
18529 Value *Op1 = EmitScalarExpr(E->getArg(1));
18530 llvm::Type *T0 = Op0->getType();
18531 llvm::Type *T1 = Op1->getType();
18532 if (!T0->isVectorTy() && !T1->isVectorTy()) {
18533 if (T0->isFloatingPointTy())
18534 return Builder.CreateFMul(Op0, Op1, "dx.dot");
18535
18536 if (T0->isIntegerTy())
18537 return Builder.CreateMul(Op0, Op1, "dx.dot");
18538
18539 // Bools should have been promoted
18540 llvm_unreachable(
18541 "Scalar dot product is only supported on ints and floats.");
18542 }
18543 // A VectorSplat should have happened
18544 assert(T0->isVectorTy() && T1->isVectorTy() &&
18545 "Dot product of vector and scalar is not supported.");
18546
18547 // A vector sext or sitofp should have happened
18548 assert(T0->getScalarType() == T1->getScalarType() &&
18549 "Dot product of vectors need the same element types.");
18550
18551 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
18552 [[maybe_unused]] auto *VecTy1 =
18553 E->getArg(1)->getType()->getAs<VectorType>();
18554 // A HLSLVectorTruncation should have happend
18555 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
18556 "Dot product requires vectors to be of the same size.");
18557
18558 return Builder.CreateIntrinsic(
18559 /*ReturnType=*/T0->getScalarType(),
18560 getDotProductIntrinsic(E->getArg(0)->getType(),
18561 VecTy0->getNumElements()),
18562 ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
18563 } break;
18564 case Builtin::BI__builtin_hlsl_lerp: {
18565 Value *X = EmitScalarExpr(E->getArg(0));
18566 Value *Y = EmitScalarExpr(E->getArg(1));
18567 Value *S = EmitScalarExpr(E->getArg(2));
18568 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18569 llvm_unreachable("lerp operand must have a float representation");
18570 return Builder.CreateIntrinsic(
18571 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
18572 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
18573 }
18574 case Builtin::BI__builtin_hlsl_length: {
18575 Value *X = EmitScalarExpr(E->getArg(0));
18576
18577 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
18578 "length operand must have a float representation");
18579 // if the operand is a scalar, we can use the fabs llvm intrinsic directly
18580 if (!E->getArg(0)->getType()->isVectorType())
18581 return EmitFAbs(*this, X);
18582
18583 return Builder.CreateIntrinsic(
18584 /*ReturnType=*/X->getType()->getScalarType(),
18585 CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
18586 nullptr, "hlsl.length");
18587 }
18588 case Builtin::BI__builtin_hlsl_normalize: {
18589 Value *X = EmitScalarExpr(E->getArg(0));
18590
18591 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
18592 "normalize operand must have a float representation");
18593
18594 return Builder.CreateIntrinsic(
18595 /*ReturnType=*/X->getType(),
18596 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
18597 nullptr, "hlsl.normalize");
18598 }
18599 case Builtin::BI__builtin_hlsl_elementwise_frac: {
18600 Value *Op0 = EmitScalarExpr(E->getArg(0));
18601 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18602 llvm_unreachable("frac operand must have a float representation");
18603 return Builder.CreateIntrinsic(
18604 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
18605 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
18606}
18607case Builtin::BI__builtin_hlsl_elementwise_isinf: {
18608 Value *Op0 = EmitScalarExpr(E->getArg(0));
18609 llvm::Type *Xty = Op0->getType();
18610 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
18611 if (Xty->isVectorTy()) {
18612 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
18613 retType = llvm::VectorType::get(
18614 retType, ElementCount::getFixed(XVecTy->getNumElements()));
18615 }
18616 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18617 llvm_unreachable("isinf operand must have a float representation");
18618 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
18619 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
18620 }
18621 case Builtin::BI__builtin_hlsl_mad: {
18622 Value *M = EmitScalarExpr(E->getArg(0));
18623 Value *A = EmitScalarExpr(E->getArg(1));
18624 Value *B = EmitScalarExpr(E->getArg(2));
18625 if (E->getArg(0)->getType()->hasFloatingRepresentation())
18626 return Builder.CreateIntrinsic(
18627 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
18628 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
18629
18630 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
18631 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18632 return Builder.CreateIntrinsic(
18633 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
18634 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
18635
18636 Value *Mul = Builder.CreateNSWMul(M, A);
18637 return Builder.CreateNSWAdd(Mul, B);
18638 }
18639 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
18640 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18641 return Builder.CreateIntrinsic(
18642 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
18643 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
18644
18645 Value *Mul = Builder.CreateNUWMul(M, A);
18646 return Builder.CreateNUWAdd(Mul, B);
18647 }
18648 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
18649 Value *Op0 = EmitScalarExpr(E->getArg(0));
18650 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18651 llvm_unreachable("rcp operand must have a float representation");
18652 llvm::Type *Ty = Op0->getType();
18653 llvm::Type *EltTy = Ty->getScalarType();
18654 Constant *One = Ty->isVectorTy()
18655 ? ConstantVector::getSplat(
18656 ElementCount::getFixed(
18657 cast<FixedVectorType>(Ty)->getNumElements()),
18658 ConstantFP::get(EltTy, 1.0))
18659 : ConstantFP::get(EltTy, 1.0);
18660 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
18661 }
18662 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
18663 Value *Op0 = EmitScalarExpr(E->getArg(0));
18664 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18665 llvm_unreachable("rsqrt operand must have a float representation");
18666 return Builder.CreateIntrinsic(
18667 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
18668 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
18669 }
18670 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
18671 Value *Op0 = EmitScalarExpr(E->getArg(0));
18672 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
18673 "saturate operand must have a float representation");
18674 return Builder.CreateIntrinsic(
18675 /*ReturnType=*/Op0->getType(),
18676 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
18677 nullptr, "hlsl.saturate");
18678 }
18679 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
18681 llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
18682 {}, false, true));
18683 }
18684 }
18685 return nullptr;
18686}
18687
18688void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
18689 const CallExpr *E) {
18690 constexpr const char *Tag = "amdgpu-as";
18691
18692 LLVMContext &Ctx = Inst->getContext();
18694 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
18695 llvm::Value *V = EmitScalarExpr(E->getArg(K));
18696 StringRef AS;
18697 if (llvm::getConstantStringInfo(V, AS)) {
18698 MMRAs.push_back({Tag, AS});
18699 // TODO: Delete the resulting unused constant?
18700 continue;
18701 }
18702 CGM.Error(E->getExprLoc(),
18703 "expected an address space name as a string literal");
18704 }
18705
18706 llvm::sort(MMRAs);
18707 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
18708 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
18709}
18710
18712 const CallExpr *E) {
18713 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
18714 llvm::SyncScope::ID SSID;
18715 switch (BuiltinID) {
18716 case AMDGPU::BI__builtin_amdgcn_div_scale:
18717 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
18718 // Translate from the intrinsics's struct return to the builtin's out
18719 // argument.
18720
18721 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
18722
18723 llvm::Value *X = EmitScalarExpr(E->getArg(0));
18724 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
18725 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
18726
18727 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
18728 X->getType());
18729
18730 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
18731
18732 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
18733 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
18734
18735 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
18736
18737 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
18738 Builder.CreateStore(FlagExt, FlagOutPtr);
18739 return Result;
18740 }
18741 case AMDGPU::BI__builtin_amdgcn_div_fmas:
18742 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
18743 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18744 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18745 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18746 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18747
18748 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
18749 Src0->getType());
18750 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
18751 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
18752 }
18753
18754 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
18755 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18756 Intrinsic::amdgcn_ds_swizzle);
18757 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
18758 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18759 Intrinsic::amdgcn_mov_dpp8);
18760 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
18761 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
18763 // Find out if any arguments are required to be integer constant
18764 // expressions.
18765 unsigned ICEArguments = 0;
18767 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
18768 assert(Error == ASTContext::GE_None && "Should not codegen an error");
18769 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
18770 Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
18771 }
18772 assert(Args.size() == 5 || Args.size() == 6);
18773 if (Args.size() == 5)
18774 Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
18775 Function *F =
18776 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
18777 return Builder.CreateCall(F, Args);
18778 }
18779 case AMDGPU::BI__builtin_amdgcn_permlane16:
18780 case AMDGPU::BI__builtin_amdgcn_permlanex16:
18781 return emitBuiltinWithOneOverloadedType<6>(
18782 *this, E,
18783 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
18784 ? Intrinsic::amdgcn_permlane16
18785 : Intrinsic::amdgcn_permlanex16);
18786 case AMDGPU::BI__builtin_amdgcn_permlane64:
18787 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18788 Intrinsic::amdgcn_permlane64);
18789 case AMDGPU::BI__builtin_amdgcn_readlane:
18790 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18791 Intrinsic::amdgcn_readlane);
18792 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
18793 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18794 Intrinsic::amdgcn_readfirstlane);
18795 case AMDGPU::BI__builtin_amdgcn_div_fixup:
18796 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
18797 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
18798 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18799 Intrinsic::amdgcn_div_fixup);
18800 case AMDGPU::BI__builtin_amdgcn_trig_preop:
18801 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
18802 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
18803 case AMDGPU::BI__builtin_amdgcn_rcp:
18804 case AMDGPU::BI__builtin_amdgcn_rcpf:
18805 case AMDGPU::BI__builtin_amdgcn_rcph:
18806 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
18807 case AMDGPU::BI__builtin_amdgcn_sqrt:
18808 case AMDGPU::BI__builtin_amdgcn_sqrtf:
18809 case AMDGPU::BI__builtin_amdgcn_sqrth:
18810 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18811 Intrinsic::amdgcn_sqrt);
18812 case AMDGPU::BI__builtin_amdgcn_rsq:
18813 case AMDGPU::BI__builtin_amdgcn_rsqf:
18814 case AMDGPU::BI__builtin_amdgcn_rsqh:
18815 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
18816 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
18817 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
18818 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18819 Intrinsic::amdgcn_rsq_clamp);
18820 case AMDGPU::BI__builtin_amdgcn_sinf:
18821 case AMDGPU::BI__builtin_amdgcn_sinh:
18822 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
18823 case AMDGPU::BI__builtin_amdgcn_cosf:
18824 case AMDGPU::BI__builtin_amdgcn_cosh:
18825 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
18826 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
18827 return EmitAMDGPUDispatchPtr(*this, E);
18828 case AMDGPU::BI__builtin_amdgcn_logf:
18829 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
18830 case AMDGPU::BI__builtin_amdgcn_exp2f:
18831 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18832 Intrinsic::amdgcn_exp2);
18833 case AMDGPU::BI__builtin_amdgcn_log_clampf:
18834 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18835 Intrinsic::amdgcn_log_clamp);
18836 case AMDGPU::BI__builtin_amdgcn_ldexp:
18837 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
18838 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18839 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18840 llvm::Function *F =
18841 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
18842 return Builder.CreateCall(F, {Src0, Src1});
18843 }
18844 case AMDGPU::BI__builtin_amdgcn_ldexph: {
18845 // The raw instruction has a different behavior for out of bounds exponent
18846 // values (implicit truncation instead of saturate to short_min/short_max).
18847 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18848 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18849 llvm::Function *F =
18850 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
18851 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
18852 }
18853 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
18854 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
18855 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
18856 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18857 Intrinsic::amdgcn_frexp_mant);
18858 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
18859 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
18860 Value *Src0 = EmitScalarExpr(E->getArg(0));
18861 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18862 { Builder.getInt32Ty(), Src0->getType() });
18863 return Builder.CreateCall(F, Src0);
18864 }
18865 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
18866 Value *Src0 = EmitScalarExpr(E->getArg(0));
18867 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18868 { Builder.getInt16Ty(), Src0->getType() });
18869 return Builder.CreateCall(F, Src0);
18870 }
18871 case AMDGPU::BI__builtin_amdgcn_fract:
18872 case AMDGPU::BI__builtin_amdgcn_fractf:
18873 case AMDGPU::BI__builtin_amdgcn_fracth:
18874 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18875 Intrinsic::amdgcn_fract);
18876 case AMDGPU::BI__builtin_amdgcn_lerp:
18877 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18878 Intrinsic::amdgcn_lerp);
18879 case AMDGPU::BI__builtin_amdgcn_ubfe:
18880 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18881 Intrinsic::amdgcn_ubfe);
18882 case AMDGPU::BI__builtin_amdgcn_sbfe:
18883 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18884 Intrinsic::amdgcn_sbfe);
18885 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
18886 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
18887 llvm::Type *ResultType = ConvertType(E->getType());
18888 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
18889 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
18890 return Builder.CreateCall(F, { Src });
18891 }
18892 case AMDGPU::BI__builtin_amdgcn_uicmp:
18893 case AMDGPU::BI__builtin_amdgcn_uicmpl:
18894 case AMDGPU::BI__builtin_amdgcn_sicmp:
18895 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
18896 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18897 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18898 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18899
18900 // FIXME-GFX10: How should 32 bit mask be handled?
18901 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
18902 { Builder.getInt64Ty(), Src0->getType() });
18903 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18904 }
18905 case AMDGPU::BI__builtin_amdgcn_fcmp:
18906 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
18907 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18908 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18909 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18910
18911 // FIXME-GFX10: How should 32 bit mask be handled?
18912 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
18913 { Builder.getInt64Ty(), Src0->getType() });
18914 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18915 }
18916 case AMDGPU::BI__builtin_amdgcn_class:
18917 case AMDGPU::BI__builtin_amdgcn_classf:
18918 case AMDGPU::BI__builtin_amdgcn_classh:
18919 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
18920 case AMDGPU::BI__builtin_amdgcn_fmed3f:
18921 case AMDGPU::BI__builtin_amdgcn_fmed3h:
18922 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18923 Intrinsic::amdgcn_fmed3);
18924 case AMDGPU::BI__builtin_amdgcn_ds_append:
18925 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
18926 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
18927 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
18928 Value *Src0 = EmitScalarExpr(E->getArg(0));
18929 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
18930 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
18931 }
18932 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18933 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18934 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18935 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
18936 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
18937 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18938 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
18939 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: {
18940
18941 Intrinsic::ID IID;
18942 switch (BuiltinID) {
18943 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18944 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18945 IID = Intrinsic::amdgcn_global_load_tr_b64;
18946 break;
18947 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18948 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
18949 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
18950 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18951 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
18952 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
18953 IID = Intrinsic::amdgcn_global_load_tr_b128;
18954 break;
18955 }
18956 llvm::Type *LoadTy = ConvertType(E->getType());
18957 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18958 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
18959 return Builder.CreateCall(F, {Addr});
18960 }
18961 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
18962 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
18963 {llvm::Type::getInt64Ty(getLLVMContext())});
18964 return Builder.CreateCall(F);
18965 }
18966 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
18967 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
18968 {llvm::Type::getInt64Ty(getLLVMContext())});
18969 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
18970 return Builder.CreateCall(F, {Env});
18971 }
18972 case AMDGPU::BI__builtin_amdgcn_read_exec:
18973 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
18974 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
18975 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
18976 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
18977 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
18978 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
18979 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
18980 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
18981 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
18982 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
18983 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
18984 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
18985 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
18986 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
18987 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
18988
18989 // The builtins take these arguments as vec4 where the last element is
18990 // ignored. The intrinsic takes them as vec3.
18991 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
18992 ArrayRef<int>{0, 1, 2});
18993 RayDir =
18994 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
18995 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
18996 ArrayRef<int>{0, 1, 2});
18997
18998 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
18999 {NodePtr->getType(), RayDir->getType()});
19000 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
19001 RayInverseDir, TextureDescr});
19002 }
19003
19004 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
19006 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19007 Args.push_back(EmitScalarExpr(E->getArg(i)));
19008
19009 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
19010 Value *Call = Builder.CreateCall(F, Args);
19011 Value *Rtn = Builder.CreateExtractValue(Call, 0);
19012 Value *A = Builder.CreateExtractValue(Call, 1);
19013 llvm::Type *RetTy = ConvertType(E->getType());
19014 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
19015 (uint64_t)0);
19016 return Builder.CreateInsertElement(I0, A, 1);
19017 }
19018
19019 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19020 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19021 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19022 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
19023 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
19024 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
19025 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
19026 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
19027 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
19028 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
19029 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19030 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19031 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
19032 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
19033 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
19034 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
19035 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
19036 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
19037 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
19038 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
19039 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
19040 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
19041 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19042 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19043 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
19044 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
19045 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
19046 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
19047 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
19048 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
19049 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
19050 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
19051 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
19052 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
19053 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19054 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19055 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19056 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19057 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19058 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19059 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19060 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19061 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19062 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19063 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19064 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19065 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19066 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
19067 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
19068 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
19069 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
19070 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19071 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19072 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19073 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19074 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19075 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19076 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19077 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19078 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
19079
19080 // These operations perform a matrix multiplication and accumulation of
19081 // the form:
19082 // D = A * B + C
19083 // We need to specify one type for matrices AB and one for matrices CD.
19084 // Sparse matrix operations can have different types for A and B as well as
19085 // an additional type for sparsity index.
19086 // Destination type should be put before types used for source operands.
19087 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
19088 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
19089 // There is no need for the variable opsel argument, so always set it to
19090 // "false".
19091 bool AppendFalseForOpselArg = false;
19092 unsigned BuiltinWMMAOp;
19093
19094 switch (BuiltinID) {
19095 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19096 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19097 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19098 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19099 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19100 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
19101 break;
19102 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
19103 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
19104 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
19105 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
19106 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19107 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
19108 break;
19109 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
19110 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
19111 AppendFalseForOpselArg = true;
19112 [[fallthrough]];
19113 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
19114 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
19115 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19116 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
19117 break;
19118 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
19119 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
19120 AppendFalseForOpselArg = true;
19121 [[fallthrough]];
19122 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19123 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19124 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19125 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
19126 break;
19127 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
19128 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
19129 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19130 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
19131 break;
19132 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19133 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
19134 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19135 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
19136 break;
19137 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
19138 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
19139 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
19140 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
19141 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19142 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
19143 break;
19144 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
19145 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
19146 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
19147 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
19148 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19149 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
19150 break;
19151 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
19152 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
19153 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19154 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
19155 break;
19156 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
19157 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
19158 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19159 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
19160 break;
19161 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
19162 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
19163 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19164 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
19165 break;
19166 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19167 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19168 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19169 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
19170 break;
19171 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19172 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19173 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19174 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
19175 break;
19176 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19177 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19178 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19179 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
19180 break;
19181 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19182 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19183 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19184 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
19185 break;
19186 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19187 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19188 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19189 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
19190 break;
19191 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19192 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19193 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19194 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
19195 break;
19196 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19197 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
19198 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19199 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
19200 break;
19201 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
19202 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
19203 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19204 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
19205 break;
19206 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
19207 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19208 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19209 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
19210 break;
19211 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19212 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19213 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19214 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
19215 break;
19216 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19217 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19218 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19219 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
19220 break;
19221 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19222 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19223 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19224 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
19225 break;
19226 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19227 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
19228 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19229 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
19230 break;
19231 }
19232
19234 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19235 Args.push_back(EmitScalarExpr(E->getArg(i)));
19236 if (AppendFalseForOpselArg)
19237 Args.push_back(Builder.getFalse());
19238
19240 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
19241 ArgTypes.push_back(Args[ArgIdx]->getType());
19242
19243 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
19244 return Builder.CreateCall(F, Args);
19245 }
19246
19247 // amdgcn workitem
19248 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
19249 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
19250 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
19251 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
19252 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
19253 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
19254
19255 // amdgcn workgroup size
19256 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
19257 return EmitAMDGPUWorkGroupSize(*this, 0);
19258 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
19259 return EmitAMDGPUWorkGroupSize(*this, 1);
19260 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
19261 return EmitAMDGPUWorkGroupSize(*this, 2);
19262
19263 // amdgcn grid size
19264 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
19265 return EmitAMDGPUGridSize(*this, 0);
19266 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
19267 return EmitAMDGPUGridSize(*this, 1);
19268 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
19269 return EmitAMDGPUGridSize(*this, 2);
19270
19271 // r600 intrinsics
19272 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
19273 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
19274 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19275 Intrinsic::r600_recipsqrt_ieee);
19276 case AMDGPU::BI__builtin_r600_read_tidig_x:
19277 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
19278 case AMDGPU::BI__builtin_r600_read_tidig_y:
19279 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
19280 case AMDGPU::BI__builtin_r600_read_tidig_z:
19281 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
19282 case AMDGPU::BI__builtin_amdgcn_alignbit: {
19283 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19284 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19285 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19286 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
19287 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19288 }
19289 case AMDGPU::BI__builtin_amdgcn_fence: {
19291 EmitScalarExpr(E->getArg(1)), AO, SSID);
19292 FenceInst *Fence = Builder.CreateFence(AO, SSID);
19293 if (E->getNumArgs() > 2)
19295 return Fence;
19296 }
19297 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19298 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19299 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19300 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19301 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19302 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19303 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19304 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
19305 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19306 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
19307 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
19308 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
19309 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
19310 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
19311 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19312 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19313 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19314 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19315 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
19316 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
19317 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
19318 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
19319 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
19320 llvm::AtomicRMWInst::BinOp BinOp;
19321 switch (BuiltinID) {
19322 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19323 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19324 BinOp = llvm::AtomicRMWInst::UIncWrap;
19325 break;
19326 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19327 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19328 BinOp = llvm::AtomicRMWInst::UDecWrap;
19329 break;
19330 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19331 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19332 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19333 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19334 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
19335 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
19336 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
19337 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
19338 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19339 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19340 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19341 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19342 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
19343 BinOp = llvm::AtomicRMWInst::FAdd;
19344 break;
19345 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
19346 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
19347 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
19348 BinOp = llvm::AtomicRMWInst::FMin;
19349 break;
19350 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
19351 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
19352 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
19353 BinOp = llvm::AtomicRMWInst::FMax;
19354 break;
19355 }
19356
19357 Address Ptr = CheckAtomicAlignment(*this, E);
19358 Value *Val = EmitScalarExpr(E->getArg(1));
19359 llvm::Type *OrigTy = Val->getType();
19360 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
19361
19362 bool Volatile;
19363
19364 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
19365 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
19366 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
19367 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
19368 Volatile =
19369 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
19370 } else {
19371 // Infer volatile from the passed type.
19372 Volatile =
19374 }
19375
19376 if (E->getNumArgs() >= 4) {
19377 // Some of the builtins have explicit ordering and scope arguments.
19379 EmitScalarExpr(E->getArg(3)), AO, SSID);
19380 } else {
19381 // Most of the builtins do not have syncscope/order arguments. For DS
19382 // atomics the scope doesn't really matter, as they implicitly operate at
19383 // workgroup scope.
19384 //
19385 // The global/flat cases need to use agent scope to consistently produce
19386 // the native instruction instead of a cmpxchg expansion.
19387 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
19388 AO = AtomicOrdering::Monotonic;
19389
19390 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
19391 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
19392 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
19393 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
19394 llvm::Type *V2BF16Ty = FixedVectorType::get(
19395 llvm::Type::getBFloatTy(Builder.getContext()), 2);
19396 Val = Builder.CreateBitCast(Val, V2BF16Ty);
19397 }
19398 }
19399
19400 llvm::AtomicRMWInst *RMW =
19401 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
19402 if (Volatile)
19403 RMW->setVolatile(true);
19404
19405 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
19406 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
19407 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
19408 // instruction for flat and global operations.
19409 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
19410 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
19411
19412 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
19413 // instruction, but this only matters for float fadd.
19414 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
19415 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
19416 }
19417
19418 return Builder.CreateBitCast(RMW, OrigTy);
19419 }
19420 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
19421 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
19422 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
19423 llvm::Type *ResultType = ConvertType(E->getType());
19424 // s_sendmsg_rtn is mangled using return type only.
19425 Function *F =
19426 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
19427 return Builder.CreateCall(F, {Arg});
19428 }
19429 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
19430 return emitBuiltinWithOneOverloadedType<4>(
19431 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
19432 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
19433 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
19434 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
19435 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
19436 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
19437 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
19438 return emitBuiltinWithOneOverloadedType<5>(
19439 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
19440 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
19441 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
19442 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
19443 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
19444 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
19445 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
19446 llvm::Type *RetTy = nullptr;
19447 switch (BuiltinID) {
19448 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
19449 RetTy = Int8Ty;
19450 break;
19451 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
19452 RetTy = Int16Ty;
19453 break;
19454 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
19455 RetTy = Int32Ty;
19456 break;
19457 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
19458 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
19459 break;
19460 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
19461 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
19462 break;
19463 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
19464 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
19465 break;
19466 }
19467 Function *F =
19468 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
19469 return Builder.CreateCall(
19470 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
19471 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
19472 }
19473 default:
19474 return nullptr;
19475 }
19476}
19477
19478/// Handle a SystemZ function in which the final argument is a pointer
19479/// to an int that receives the post-instruction CC value. At the LLVM level
19480/// this is represented as a function that returns a {result, cc} pair.
19482 unsigned IntrinsicID,
19483 const CallExpr *E) {
19484 unsigned NumArgs = E->getNumArgs() - 1;
19485 SmallVector<Value *, 8> Args(NumArgs);
19486 for (unsigned I = 0; I < NumArgs; ++I)
19487 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
19488 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
19489 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
19490 Value *Call = CGF.Builder.CreateCall(F, Args);
19491 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
19492 CGF.Builder.CreateStore(CC, CCPtr);
19493 return CGF.Builder.CreateExtractValue(Call, 0);
19494}
19495
19497 const CallExpr *E) {
19498 switch (BuiltinID) {
19499 case SystemZ::BI__builtin_tbegin: {
19500 Value *TDB = EmitScalarExpr(E->getArg(0));
19501 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19502 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
19503 return Builder.CreateCall(F, {TDB, Control});
19504 }
19505 case SystemZ::BI__builtin_tbegin_nofloat: {
19506 Value *TDB = EmitScalarExpr(E->getArg(0));
19507 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19508 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
19509 return Builder.CreateCall(F, {TDB, Control});
19510 }
19511 case SystemZ::BI__builtin_tbeginc: {
19512 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
19513 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
19514 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
19515 return Builder.CreateCall(F, {TDB, Control});
19516 }
19517 case SystemZ::BI__builtin_tabort: {
19518 Value *Data = EmitScalarExpr(E->getArg(0));
19519 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
19520 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
19521 }
19522 case SystemZ::BI__builtin_non_tx_store: {
19523 Value *Address = EmitScalarExpr(E->getArg(0));
19524 Value *Data = EmitScalarExpr(E->getArg(1));
19525 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
19526 return Builder.CreateCall(F, {Data, Address});
19527 }
19528
19529 // Vector builtins. Note that most vector builtins are mapped automatically
19530 // to target-specific LLVM intrinsics. The ones handled specially here can
19531 // be represented via standard LLVM IR, which is preferable to enable common
19532 // LLVM optimizations.
19533
19534 case SystemZ::BI__builtin_s390_vpopctb:
19535 case SystemZ::BI__builtin_s390_vpopcth:
19536 case SystemZ::BI__builtin_s390_vpopctf:
19537 case SystemZ::BI__builtin_s390_vpopctg: {
19538 llvm::Type *ResultType = ConvertType(E->getType());
19539 Value *X = EmitScalarExpr(E->getArg(0));
19540 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
19541 return Builder.CreateCall(F, X);
19542 }
19543
19544 case SystemZ::BI__builtin_s390_vclzb:
19545 case SystemZ::BI__builtin_s390_vclzh:
19546 case SystemZ::BI__builtin_s390_vclzf:
19547 case SystemZ::BI__builtin_s390_vclzg: {
19548 llvm::Type *ResultType = ConvertType(E->getType());
19549 Value *X = EmitScalarExpr(E->getArg(0));
19550 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19551 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
19552 return Builder.CreateCall(F, {X, Undef});
19553 }
19554
19555 case SystemZ::BI__builtin_s390_vctzb:
19556 case SystemZ::BI__builtin_s390_vctzh:
19557 case SystemZ::BI__builtin_s390_vctzf:
19558 case SystemZ::BI__builtin_s390_vctzg: {
19559 llvm::Type *ResultType = ConvertType(E->getType());
19560 Value *X = EmitScalarExpr(E->getArg(0));
19561 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19562 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
19563 return Builder.CreateCall(F, {X, Undef});
19564 }
19565
19566 case SystemZ::BI__builtin_s390_verllb:
19567 case SystemZ::BI__builtin_s390_verllh:
19568 case SystemZ::BI__builtin_s390_verllf:
19569 case SystemZ::BI__builtin_s390_verllg: {
19570 llvm::Type *ResultType = ConvertType(E->getType());
19571 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19572 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19573 // Splat scalar rotate amount to vector type.
19574 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
19575 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
19576 Amt = Builder.CreateVectorSplat(NumElts, Amt);
19577 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19578 return Builder.CreateCall(F, { Src, Src, Amt });
19579 }
19580
19581 case SystemZ::BI__builtin_s390_verllvb:
19582 case SystemZ::BI__builtin_s390_verllvh:
19583 case SystemZ::BI__builtin_s390_verllvf:
19584 case SystemZ::BI__builtin_s390_verllvg: {
19585 llvm::Type *ResultType = ConvertType(E->getType());
19586 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19587 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19588 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19589 return Builder.CreateCall(F, { Src, Src, Amt });
19590 }
19591
19592 case SystemZ::BI__builtin_s390_vfsqsb:
19593 case SystemZ::BI__builtin_s390_vfsqdb: {
19594 llvm::Type *ResultType = ConvertType(E->getType());
19595 Value *X = EmitScalarExpr(E->getArg(0));
19596 if (Builder.getIsFPConstrained()) {
19597 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
19598 return Builder.CreateConstrainedFPCall(F, { X });
19599 } else {
19600 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
19601 return Builder.CreateCall(F, X);
19602 }
19603 }
19604 case SystemZ::BI__builtin_s390_vfmasb:
19605 case SystemZ::BI__builtin_s390_vfmadb: {
19606 llvm::Type *ResultType = ConvertType(E->getType());
19607 Value *X = EmitScalarExpr(E->getArg(0));
19608 Value *Y = EmitScalarExpr(E->getArg(1));
19609 Value *Z = EmitScalarExpr(E->getArg(2));
19610 if (Builder.getIsFPConstrained()) {
19611 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19612 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
19613 } else {
19614 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19615 return Builder.CreateCall(F, {X, Y, Z});
19616 }
19617 }
19618 case SystemZ::BI__builtin_s390_vfmssb:
19619 case SystemZ::BI__builtin_s390_vfmsdb: {
19620 llvm::Type *ResultType = ConvertType(E->getType());
19621 Value *X = EmitScalarExpr(E->getArg(0));
19622 Value *Y = EmitScalarExpr(E->getArg(1));
19623 Value *Z = EmitScalarExpr(E->getArg(2));
19624 if (Builder.getIsFPConstrained()) {
19625 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19626 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19627 } else {
19628 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19629 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19630 }
19631 }
19632 case SystemZ::BI__builtin_s390_vfnmasb:
19633 case SystemZ::BI__builtin_s390_vfnmadb: {
19634 llvm::Type *ResultType = ConvertType(E->getType());
19635 Value *X = EmitScalarExpr(E->getArg(0));
19636 Value *Y = EmitScalarExpr(E->getArg(1));
19637 Value *Z = EmitScalarExpr(E->getArg(2));
19638 if (Builder.getIsFPConstrained()) {
19639 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19640 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
19641 } else {
19642 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19643 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
19644 }
19645 }
19646 case SystemZ::BI__builtin_s390_vfnmssb:
19647 case SystemZ::BI__builtin_s390_vfnmsdb: {
19648 llvm::Type *ResultType = ConvertType(E->getType());
19649 Value *X = EmitScalarExpr(E->getArg(0));
19650 Value *Y = EmitScalarExpr(E->getArg(1));
19651 Value *Z = EmitScalarExpr(E->getArg(2));
19652 if (Builder.getIsFPConstrained()) {
19653 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19654 Value *NegZ = Builder.CreateFNeg(Z, "sub");
19655 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
19656 } else {
19657 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19658 Value *NegZ = Builder.CreateFNeg(Z, "neg");
19659 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
19660 }
19661 }
19662 case SystemZ::BI__builtin_s390_vflpsb:
19663 case SystemZ::BI__builtin_s390_vflpdb: {
19664 llvm::Type *ResultType = ConvertType(E->getType());
19665 Value *X = EmitScalarExpr(E->getArg(0));
19666 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19667 return Builder.CreateCall(F, X);
19668 }
19669 case SystemZ::BI__builtin_s390_vflnsb:
19670 case SystemZ::BI__builtin_s390_vflndb: {
19671 llvm::Type *ResultType = ConvertType(E->getType());
19672 Value *X = EmitScalarExpr(E->getArg(0));
19673 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19674 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
19675 }
19676 case SystemZ::BI__builtin_s390_vfisb:
19677 case SystemZ::BI__builtin_s390_vfidb: {
19678 llvm::Type *ResultType = ConvertType(E->getType());
19679 Value *X = EmitScalarExpr(E->getArg(0));
19680 // Constant-fold the M4 and M5 mask arguments.
19681 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
19682 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19683 // Check whether this instance can be represented via a LLVM standard
19684 // intrinsic. We only support some combinations of M4 and M5.
19685 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19686 Intrinsic::ID CI;
19687 switch (M4.getZExtValue()) {
19688 default: break;
19689 case 0: // IEEE-inexact exception allowed
19690 switch (M5.getZExtValue()) {
19691 default: break;
19692 case 0: ID = Intrinsic::rint;
19693 CI = Intrinsic::experimental_constrained_rint; break;
19694 }
19695 break;
19696 case 4: // IEEE-inexact exception suppressed
19697 switch (M5.getZExtValue()) {
19698 default: break;
19699 case 0: ID = Intrinsic::nearbyint;
19700 CI = Intrinsic::experimental_constrained_nearbyint; break;
19701 case 1: ID = Intrinsic::round;
19702 CI = Intrinsic::experimental_constrained_round; break;
19703 case 5: ID = Intrinsic::trunc;
19704 CI = Intrinsic::experimental_constrained_trunc; break;
19705 case 6: ID = Intrinsic::ceil;
19706 CI = Intrinsic::experimental_constrained_ceil; break;
19707 case 7: ID = Intrinsic::floor;
19708 CI = Intrinsic::experimental_constrained_floor; break;
19709 }
19710 break;
19711 }
19712 if (ID != Intrinsic::not_intrinsic) {
19713 if (Builder.getIsFPConstrained()) {
19714 Function *F = CGM.getIntrinsic(CI, ResultType);
19715 return Builder.CreateConstrainedFPCall(F, X);
19716 } else {
19717 Function *F = CGM.getIntrinsic(ID, ResultType);
19718 return Builder.CreateCall(F, X);
19719 }
19720 }
19721 switch (BuiltinID) { // FIXME: constrained version?
19722 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
19723 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
19724 default: llvm_unreachable("Unknown BuiltinID");
19725 }
19726 Function *F = CGM.getIntrinsic(ID);
19727 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19728 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
19729 return Builder.CreateCall(F, {X, M4Value, M5Value});
19730 }
19731 case SystemZ::BI__builtin_s390_vfmaxsb:
19732 case SystemZ::BI__builtin_s390_vfmaxdb: {
19733 llvm::Type *ResultType = ConvertType(E->getType());
19734 Value *X = EmitScalarExpr(E->getArg(0));
19735 Value *Y = EmitScalarExpr(E->getArg(1));
19736 // Constant-fold the M4 mask argument.
19737 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19738 // Check whether this instance can be represented via a LLVM standard
19739 // intrinsic. We only support some values of M4.
19740 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19741 Intrinsic::ID CI;
19742 switch (M4.getZExtValue()) {
19743 default: break;
19744 case 4: ID = Intrinsic::maxnum;
19745 CI = Intrinsic::experimental_constrained_maxnum; break;
19746 }
19747 if (ID != Intrinsic::not_intrinsic) {
19748 if (Builder.getIsFPConstrained()) {
19749 Function *F = CGM.getIntrinsic(CI, ResultType);
19750 return Builder.CreateConstrainedFPCall(F, {X, Y});
19751 } else {
19752 Function *F = CGM.getIntrinsic(ID, ResultType);
19753 return Builder.CreateCall(F, {X, Y});
19754 }
19755 }
19756 switch (BuiltinID) {
19757 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
19758 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
19759 default: llvm_unreachable("Unknown BuiltinID");
19760 }
19761 Function *F = CGM.getIntrinsic(ID);
19762 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19763 return Builder.CreateCall(F, {X, Y, M4Value});
19764 }
19765 case SystemZ::BI__builtin_s390_vfminsb:
19766 case SystemZ::BI__builtin_s390_vfmindb: {
19767 llvm::Type *ResultType = ConvertType(E->getType());
19768 Value *X = EmitScalarExpr(E->getArg(0));
19769 Value *Y = EmitScalarExpr(E->getArg(1));
19770 // Constant-fold the M4 mask argument.
19771 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19772 // Check whether this instance can be represented via a LLVM standard
19773 // intrinsic. We only support some values of M4.
19774 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19775 Intrinsic::ID CI;
19776 switch (M4.getZExtValue()) {
19777 default: break;
19778 case 4: ID = Intrinsic::minnum;
19779 CI = Intrinsic::experimental_constrained_minnum; break;
19780 }
19781 if (ID != Intrinsic::not_intrinsic) {
19782 if (Builder.getIsFPConstrained()) {
19783 Function *F = CGM.getIntrinsic(CI, ResultType);
19784 return Builder.CreateConstrainedFPCall(F, {X, Y});
19785 } else {
19786 Function *F = CGM.getIntrinsic(ID, ResultType);
19787 return Builder.CreateCall(F, {X, Y});
19788 }
19789 }
19790 switch (BuiltinID) {
19791 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
19792 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
19793 default: llvm_unreachable("Unknown BuiltinID");
19794 }
19795 Function *F = CGM.getIntrinsic(ID);
19796 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19797 return Builder.CreateCall(F, {X, Y, M4Value});
19798 }
19799
19800 case SystemZ::BI__builtin_s390_vlbrh:
19801 case SystemZ::BI__builtin_s390_vlbrf:
19802 case SystemZ::BI__builtin_s390_vlbrg: {
19803 llvm::Type *ResultType = ConvertType(E->getType());
19804 Value *X = EmitScalarExpr(E->getArg(0));
19805 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
19806 return Builder.CreateCall(F, X);
19807 }
19808
19809 // Vector intrinsics that output the post-instruction CC value.
19810
19811#define INTRINSIC_WITH_CC(NAME) \
19812 case SystemZ::BI__builtin_##NAME: \
19813 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
19814
19815 INTRINSIC_WITH_CC(s390_vpkshs);
19816 INTRINSIC_WITH_CC(s390_vpksfs);
19817 INTRINSIC_WITH_CC(s390_vpksgs);
19818
19819 INTRINSIC_WITH_CC(s390_vpklshs);
19820 INTRINSIC_WITH_CC(s390_vpklsfs);
19821 INTRINSIC_WITH_CC(s390_vpklsgs);
19822
19823 INTRINSIC_WITH_CC(s390_vceqbs);
19824 INTRINSIC_WITH_CC(s390_vceqhs);
19825 INTRINSIC_WITH_CC(s390_vceqfs);
19826 INTRINSIC_WITH_CC(s390_vceqgs);
19827
19828 INTRINSIC_WITH_CC(s390_vchbs);
19829 INTRINSIC_WITH_CC(s390_vchhs);
19830 INTRINSIC_WITH_CC(s390_vchfs);
19831 INTRINSIC_WITH_CC(s390_vchgs);
19832
19833 INTRINSIC_WITH_CC(s390_vchlbs);
19834 INTRINSIC_WITH_CC(s390_vchlhs);
19835 INTRINSIC_WITH_CC(s390_vchlfs);
19836 INTRINSIC_WITH_CC(s390_vchlgs);
19837
19838 INTRINSIC_WITH_CC(s390_vfaebs);
19839 INTRINSIC_WITH_CC(s390_vfaehs);
19840 INTRINSIC_WITH_CC(s390_vfaefs);
19841
19842 INTRINSIC_WITH_CC(s390_vfaezbs);
19843 INTRINSIC_WITH_CC(s390_vfaezhs);
19844 INTRINSIC_WITH_CC(s390_vfaezfs);
19845
19846 INTRINSIC_WITH_CC(s390_vfeebs);
19847 INTRINSIC_WITH_CC(s390_vfeehs);
19848 INTRINSIC_WITH_CC(s390_vfeefs);
19849
19850 INTRINSIC_WITH_CC(s390_vfeezbs);
19851 INTRINSIC_WITH_CC(s390_vfeezhs);
19852 INTRINSIC_WITH_CC(s390_vfeezfs);
19853
19854 INTRINSIC_WITH_CC(s390_vfenebs);
19855 INTRINSIC_WITH_CC(s390_vfenehs);
19856 INTRINSIC_WITH_CC(s390_vfenefs);
19857
19858 INTRINSIC_WITH_CC(s390_vfenezbs);
19859 INTRINSIC_WITH_CC(s390_vfenezhs);
19860 INTRINSIC_WITH_CC(s390_vfenezfs);
19861
19862 INTRINSIC_WITH_CC(s390_vistrbs);
19863 INTRINSIC_WITH_CC(s390_vistrhs);
19864 INTRINSIC_WITH_CC(s390_vistrfs);
19865
19866 INTRINSIC_WITH_CC(s390_vstrcbs);
19867 INTRINSIC_WITH_CC(s390_vstrchs);
19868 INTRINSIC_WITH_CC(s390_vstrcfs);
19869
19870 INTRINSIC_WITH_CC(s390_vstrczbs);
19871 INTRINSIC_WITH_CC(s390_vstrczhs);
19872 INTRINSIC_WITH_CC(s390_vstrczfs);
19873
19874 INTRINSIC_WITH_CC(s390_vfcesbs);
19875 INTRINSIC_WITH_CC(s390_vfcedbs);
19876 INTRINSIC_WITH_CC(s390_vfchsbs);
19877 INTRINSIC_WITH_CC(s390_vfchdbs);
19878 INTRINSIC_WITH_CC(s390_vfchesbs);
19879 INTRINSIC_WITH_CC(s390_vfchedbs);
19880
19881 INTRINSIC_WITH_CC(s390_vftcisb);
19882 INTRINSIC_WITH_CC(s390_vftcidb);
19883
19884 INTRINSIC_WITH_CC(s390_vstrsb);
19885 INTRINSIC_WITH_CC(s390_vstrsh);
19886 INTRINSIC_WITH_CC(s390_vstrsf);
19887
19888 INTRINSIC_WITH_CC(s390_vstrszb);
19889 INTRINSIC_WITH_CC(s390_vstrszh);
19890 INTRINSIC_WITH_CC(s390_vstrszf);
19891
19892#undef INTRINSIC_WITH_CC
19893
19894 default:
19895 return nullptr;
19896 }
19897}
19898
19899namespace {
19900// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
19901struct NVPTXMmaLdstInfo {
19902 unsigned NumResults; // Number of elements to load/store
19903 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
19904 unsigned IID_col;
19905 unsigned IID_row;
19906};
19907
19908#define MMA_INTR(geom_op_type, layout) \
19909 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
19910#define MMA_LDST(n, geom_op_type) \
19911 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
19912
19913static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
19914 switch (BuiltinID) {
19915 // FP MMA loads
19916 case NVPTX::BI__hmma_m16n16k16_ld_a:
19917 return MMA_LDST(8, m16n16k16_load_a_f16);
19918 case NVPTX::BI__hmma_m16n16k16_ld_b:
19919 return MMA_LDST(8, m16n16k16_load_b_f16);
19920 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19921 return MMA_LDST(4, m16n16k16_load_c_f16);
19922 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19923 return MMA_LDST(8, m16n16k16_load_c_f32);
19924 case NVPTX::BI__hmma_m32n8k16_ld_a:
19925 return MMA_LDST(8, m32n8k16_load_a_f16);
19926 case NVPTX::BI__hmma_m32n8k16_ld_b:
19927 return MMA_LDST(8, m32n8k16_load_b_f16);
19928 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19929 return MMA_LDST(4, m32n8k16_load_c_f16);
19930 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19931 return MMA_LDST(8, m32n8k16_load_c_f32);
19932 case NVPTX::BI__hmma_m8n32k16_ld_a:
19933 return MMA_LDST(8, m8n32k16_load_a_f16);
19934 case NVPTX::BI__hmma_m8n32k16_ld_b:
19935 return MMA_LDST(8, m8n32k16_load_b_f16);
19936 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
19937 return MMA_LDST(4, m8n32k16_load_c_f16);
19938 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
19939 return MMA_LDST(8, m8n32k16_load_c_f32);
19940
19941 // Integer MMA loads
19942 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
19943 return MMA_LDST(2, m16n16k16_load_a_s8);
19944 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
19945 return MMA_LDST(2, m16n16k16_load_a_u8);
19946 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
19947 return MMA_LDST(2, m16n16k16_load_b_s8);
19948 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
19949 return MMA_LDST(2, m16n16k16_load_b_u8);
19950 case NVPTX::BI__imma_m16n16k16_ld_c:
19951 return MMA_LDST(8, m16n16k16_load_c_s32);
19952 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
19953 return MMA_LDST(4, m32n8k16_load_a_s8);
19954 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
19955 return MMA_LDST(4, m32n8k16_load_a_u8);
19956 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
19957 return MMA_LDST(1, m32n8k16_load_b_s8);
19958 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
19959 return MMA_LDST(1, m32n8k16_load_b_u8);
19960 case NVPTX::BI__imma_m32n8k16_ld_c:
19961 return MMA_LDST(8, m32n8k16_load_c_s32);
19962 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
19963 return MMA_LDST(1, m8n32k16_load_a_s8);
19964 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
19965 return MMA_LDST(1, m8n32k16_load_a_u8);
19966 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
19967 return MMA_LDST(4, m8n32k16_load_b_s8);
19968 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
19969 return MMA_LDST(4, m8n32k16_load_b_u8);
19970 case NVPTX::BI__imma_m8n32k16_ld_c:
19971 return MMA_LDST(8, m8n32k16_load_c_s32);
19972
19973 // Sub-integer MMA loads.
19974 // Only row/col layout is supported by A/B fragments.
19975 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
19976 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
19977 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
19978 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
19979 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
19980 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
19981 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
19982 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
19983 case NVPTX::BI__imma_m8n8k32_ld_c:
19984 return MMA_LDST(2, m8n8k32_load_c_s32);
19985 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
19986 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
19987 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
19988 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
19989 case NVPTX::BI__bmma_m8n8k128_ld_c:
19990 return MMA_LDST(2, m8n8k128_load_c_s32);
19991
19992 // Double MMA loads
19993 case NVPTX::BI__dmma_m8n8k4_ld_a:
19994 return MMA_LDST(1, m8n8k4_load_a_f64);
19995 case NVPTX::BI__dmma_m8n8k4_ld_b:
19996 return MMA_LDST(1, m8n8k4_load_b_f64);
19997 case NVPTX::BI__dmma_m8n8k4_ld_c:
19998 return MMA_LDST(2, m8n8k4_load_c_f64);
19999
20000 // Alternate float MMA loads
20001 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20002 return MMA_LDST(4, m16n16k16_load_a_bf16);
20003 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20004 return MMA_LDST(4, m16n16k16_load_b_bf16);
20005 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20006 return MMA_LDST(2, m8n32k16_load_a_bf16);
20007 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20008 return MMA_LDST(8, m8n32k16_load_b_bf16);
20009 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20010 return MMA_LDST(8, m32n8k16_load_a_bf16);
20011 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20012 return MMA_LDST(2, m32n8k16_load_b_bf16);
20013 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20014 return MMA_LDST(4, m16n16k8_load_a_tf32);
20015 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20016 return MMA_LDST(4, m16n16k8_load_b_tf32);
20017 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
20018 return MMA_LDST(8, m16n16k8_load_c_f32);
20019
20020 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
20021 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
20022 // use fragment C for both loads and stores.
20023 // FP MMA stores.
20024 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20025 return MMA_LDST(4, m16n16k16_store_d_f16);
20026 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20027 return MMA_LDST(8, m16n16k16_store_d_f32);
20028 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20029 return MMA_LDST(4, m32n8k16_store_d_f16);
20030 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20031 return MMA_LDST(8, m32n8k16_store_d_f32);
20032 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20033 return MMA_LDST(4, m8n32k16_store_d_f16);
20034 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20035 return MMA_LDST(8, m8n32k16_store_d_f32);
20036
20037 // Integer and sub-integer MMA stores.
20038 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
20039 // name, integer loads/stores use LLVM's i32.
20040 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20041 return MMA_LDST(8, m16n16k16_store_d_s32);
20042 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20043 return MMA_LDST(8, m32n8k16_store_d_s32);
20044 case NVPTX::BI__imma_m8n32k16_st_c_i32:
20045 return MMA_LDST(8, m8n32k16_store_d_s32);
20046 case NVPTX::BI__imma_m8n8k32_st_c_i32:
20047 return MMA_LDST(2, m8n8k32_store_d_s32);
20048 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
20049 return MMA_LDST(2, m8n8k128_store_d_s32);
20050
20051 // Double MMA store
20052 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
20053 return MMA_LDST(2, m8n8k4_store_d_f64);
20054
20055 // Alternate float MMA store
20056 case NVPTX::BI__mma_m16n16k8_st_c_f32:
20057 return MMA_LDST(8, m16n16k8_store_d_f32);
20058
20059 default:
20060 llvm_unreachable("Unknown MMA builtin");
20061 }
20062}
20063#undef MMA_LDST
20064#undef MMA_INTR
20065
20066
20067struct NVPTXMmaInfo {
20068 unsigned NumEltsA;
20069 unsigned NumEltsB;
20070 unsigned NumEltsC;
20071 unsigned NumEltsD;
20072
20073 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
20074 // over 'col' for layout. The index of non-satf variants is expected to match
20075 // the undocumented layout constants used by CUDA's mma.hpp.
20076 std::array<unsigned, 8> Variants;
20077
20078 unsigned getMMAIntrinsic(int Layout, bool Satf) {
20079 unsigned Index = Layout + 4 * Satf;
20080 if (Index >= Variants.size())
20081 return 0;
20082 return Variants[Index];
20083 }
20084};
20085
20086 // Returns an intrinsic that matches Layout and Satf for valid combinations of
20087 // Layout and Satf, 0 otherwise.
20088static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
20089 // clang-format off
20090#define MMA_VARIANTS(geom, type) \
20091 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
20092 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
20093 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
20094 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
20095#define MMA_SATF_VARIANTS(geom, type) \
20096 MMA_VARIANTS(geom, type), \
20097 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
20098 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
20099 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
20100 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
20101// Sub-integer MMA only supports row.col layout.
20102#define MMA_VARIANTS_I4(geom, type) \
20103 0, \
20104 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
20105 0, \
20106 0, \
20107 0, \
20108 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
20109 0, \
20110 0
20111// b1 MMA does not support .satfinite.
20112#define MMA_VARIANTS_B1_XOR(geom, type) \
20113 0, \
20114 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
20115 0, \
20116 0, \
20117 0, \
20118 0, \
20119 0, \
20120 0
20121#define MMA_VARIANTS_B1_AND(geom, type) \
20122 0, \
20123 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
20124 0, \
20125 0, \
20126 0, \
20127 0, \
20128 0, \
20129 0
20130 // clang-format on
20131 switch (BuiltinID) {
20132 // FP MMA
20133 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
20134 // NumEltsN of return value are ordered as A,B,C,D.
20135 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
20136 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
20137 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
20138 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
20139 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
20140 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
20141 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
20142 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
20143 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
20144 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
20145 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
20146 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
20147 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
20148 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
20149 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
20150 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
20151 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
20152 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
20153 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
20154 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
20155 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
20156 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
20157 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
20158 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
20159
20160 // Integer MMA
20161 case NVPTX::BI__imma_m16n16k16_mma_s8:
20162 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
20163 case NVPTX::BI__imma_m16n16k16_mma_u8:
20164 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
20165 case NVPTX::BI__imma_m32n8k16_mma_s8:
20166 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
20167 case NVPTX::BI__imma_m32n8k16_mma_u8:
20168 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
20169 case NVPTX::BI__imma_m8n32k16_mma_s8:
20170 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
20171 case NVPTX::BI__imma_m8n32k16_mma_u8:
20172 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
20173
20174 // Sub-integer MMA
20175 case NVPTX::BI__imma_m8n8k32_mma_s4:
20176 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
20177 case NVPTX::BI__imma_m8n8k32_mma_u4:
20178 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
20179 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20180 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
20181 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20182 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
20183
20184 // Double MMA
20185 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20186 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
20187
20188 // Alternate FP MMA
20189 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20190 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
20191 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20192 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
20193 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20194 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
20195 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
20196 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
20197 default:
20198 llvm_unreachable("Unexpected builtin ID.");
20199 }
20200#undef MMA_VARIANTS
20201#undef MMA_SATF_VARIANTS
20202#undef MMA_VARIANTS_I4
20203#undef MMA_VARIANTS_B1_AND
20204#undef MMA_VARIANTS_B1_XOR
20205}
20206
20207static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
20208 const CallExpr *E) {
20209 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
20210 QualType ArgType = E->getArg(0)->getType();
20212 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
20213 return CGF.Builder.CreateCall(
20214 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
20215 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
20216}
20217
20218static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
20219 const CallExpr *E) {
20220 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
20221 llvm::Type *ElemTy =
20222 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20223 return CGF.Builder.CreateCall(
20224 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
20225 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
20226}
20227
20228static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
20229 CodeGenFunction &CGF, const CallExpr *E,
20230 int SrcSize) {
20231 return E->getNumArgs() == 3
20232 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
20233 {CGF.EmitScalarExpr(E->getArg(0)),
20234 CGF.EmitScalarExpr(E->getArg(1)),
20235 CGF.EmitScalarExpr(E->getArg(2))})
20236 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
20237 {CGF.EmitScalarExpr(E->getArg(0)),
20238 CGF.EmitScalarExpr(E->getArg(1))});
20239}
20240
20241static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
20242 const CallExpr *E, CodeGenFunction &CGF) {
20243 auto &C = CGF.CGM.getContext();
20244 if (!(C.getLangOpts().NativeHalfType ||
20245 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
20246 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
20247 " requires native half type support.");
20248 return nullptr;
20249 }
20250
20251 if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
20252 IntrinsicID == Intrinsic::nvvm_ldu_global_f)
20253 return MakeLdgLdu(IntrinsicID, CGF, E);
20254
20256 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
20257 auto *FTy = F->getFunctionType();
20258 unsigned ICEArguments = 0;
20260 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
20261 assert(Error == ASTContext::GE_None && "Should not codegen an error");
20262 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
20263 assert((ICEArguments & (1 << i)) == 0);
20264 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
20265 auto *PTy = FTy->getParamType(i);
20266 if (PTy != ArgValue->getType())
20267 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
20268 Args.push_back(ArgValue);
20269 }
20270
20271 return CGF.Builder.CreateCall(F, Args);
20272}
20273} // namespace
20274
20276 const CallExpr *E) {
20277 switch (BuiltinID) {
20278 case NVPTX::BI__nvvm_atom_add_gen_i:
20279 case NVPTX::BI__nvvm_atom_add_gen_l:
20280 case NVPTX::BI__nvvm_atom_add_gen_ll:
20281 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
20282
20283 case NVPTX::BI__nvvm_atom_sub_gen_i:
20284 case NVPTX::BI__nvvm_atom_sub_gen_l:
20285 case NVPTX::BI__nvvm_atom_sub_gen_ll:
20286 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
20287
20288 case NVPTX::BI__nvvm_atom_and_gen_i:
20289 case NVPTX::BI__nvvm_atom_and_gen_l:
20290 case NVPTX::BI__nvvm_atom_and_gen_ll:
20291 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
20292
20293 case NVPTX::BI__nvvm_atom_or_gen_i:
20294 case NVPTX::BI__nvvm_atom_or_gen_l:
20295 case NVPTX::BI__nvvm_atom_or_gen_ll:
20296 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
20297
20298 case NVPTX::BI__nvvm_atom_xor_gen_i:
20299 case NVPTX::BI__nvvm_atom_xor_gen_l:
20300 case NVPTX::BI__nvvm_atom_xor_gen_ll:
20301 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
20302
20303 case NVPTX::BI__nvvm_atom_xchg_gen_i:
20304 case NVPTX::BI__nvvm_atom_xchg_gen_l:
20305 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
20306 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
20307
20308 case NVPTX::BI__nvvm_atom_max_gen_i:
20309 case NVPTX::BI__nvvm_atom_max_gen_l:
20310 case NVPTX::BI__nvvm_atom_max_gen_ll:
20311 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
20312
20313 case NVPTX::BI__nvvm_atom_max_gen_ui:
20314 case NVPTX::BI__nvvm_atom_max_gen_ul:
20315 case NVPTX::BI__nvvm_atom_max_gen_ull:
20316 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
20317
20318 case NVPTX::BI__nvvm_atom_min_gen_i:
20319 case NVPTX::BI__nvvm_atom_min_gen_l:
20320 case NVPTX::BI__nvvm_atom_min_gen_ll:
20321 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
20322
20323 case NVPTX::BI__nvvm_atom_min_gen_ui:
20324 case NVPTX::BI__nvvm_atom_min_gen_ul:
20325 case NVPTX::BI__nvvm_atom_min_gen_ull:
20326 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
20327
20328 case NVPTX::BI__nvvm_atom_cas_gen_i:
20329 case NVPTX::BI__nvvm_atom_cas_gen_l:
20330 case NVPTX::BI__nvvm_atom_cas_gen_ll:
20331 // __nvvm_atom_cas_gen_* should return the old value rather than the
20332 // success flag.
20333 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
20334
20335 case NVPTX::BI__nvvm_atom_add_gen_f:
20336 case NVPTX::BI__nvvm_atom_add_gen_d: {
20337 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
20338 Value *Val = EmitScalarExpr(E->getArg(1));
20339
20340 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
20341 AtomicOrdering::SequentiallyConsistent);
20342 }
20343
20344 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
20345 Value *Ptr = EmitScalarExpr(E->getArg(0));
20346 Value *Val = EmitScalarExpr(E->getArg(1));
20347 Function *FnALI32 =
20348 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
20349 return Builder.CreateCall(FnALI32, {Ptr, Val});
20350 }
20351
20352 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
20353 Value *Ptr = EmitScalarExpr(E->getArg(0));
20354 Value *Val = EmitScalarExpr(E->getArg(1));
20355 Function *FnALD32 =
20356 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
20357 return Builder.CreateCall(FnALD32, {Ptr, Val});
20358 }
20359
20360 case NVPTX::BI__nvvm_ldg_c:
20361 case NVPTX::BI__nvvm_ldg_sc:
20362 case NVPTX::BI__nvvm_ldg_c2:
20363 case NVPTX::BI__nvvm_ldg_sc2:
20364 case NVPTX::BI__nvvm_ldg_c4:
20365 case NVPTX::BI__nvvm_ldg_sc4:
20366 case NVPTX::BI__nvvm_ldg_s:
20367 case NVPTX::BI__nvvm_ldg_s2:
20368 case NVPTX::BI__nvvm_ldg_s4:
20369 case NVPTX::BI__nvvm_ldg_i:
20370 case NVPTX::BI__nvvm_ldg_i2:
20371 case NVPTX::BI__nvvm_ldg_i4:
20372 case NVPTX::BI__nvvm_ldg_l:
20373 case NVPTX::BI__nvvm_ldg_l2:
20374 case NVPTX::BI__nvvm_ldg_ll:
20375 case NVPTX::BI__nvvm_ldg_ll2:
20376 case NVPTX::BI__nvvm_ldg_uc:
20377 case NVPTX::BI__nvvm_ldg_uc2:
20378 case NVPTX::BI__nvvm_ldg_uc4:
20379 case NVPTX::BI__nvvm_ldg_us:
20380 case NVPTX::BI__nvvm_ldg_us2:
20381 case NVPTX::BI__nvvm_ldg_us4:
20382 case NVPTX::BI__nvvm_ldg_ui:
20383 case NVPTX::BI__nvvm_ldg_ui2:
20384 case NVPTX::BI__nvvm_ldg_ui4:
20385 case NVPTX::BI__nvvm_ldg_ul:
20386 case NVPTX::BI__nvvm_ldg_ul2:
20387 case NVPTX::BI__nvvm_ldg_ull:
20388 case NVPTX::BI__nvvm_ldg_ull2:
20389 // PTX Interoperability section 2.2: "For a vector with an even number of
20390 // elements, its alignment is set to number of elements times the alignment
20391 // of its member: n*alignof(t)."
20392 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
20393 case NVPTX::BI__nvvm_ldg_f:
20394 case NVPTX::BI__nvvm_ldg_f2:
20395 case NVPTX::BI__nvvm_ldg_f4:
20396 case NVPTX::BI__nvvm_ldg_d:
20397 case NVPTX::BI__nvvm_ldg_d2:
20398 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
20399
20400 case NVPTX::BI__nvvm_ldu_c:
20401 case NVPTX::BI__nvvm_ldu_sc:
20402 case NVPTX::BI__nvvm_ldu_c2:
20403 case NVPTX::BI__nvvm_ldu_sc2:
20404 case NVPTX::BI__nvvm_ldu_c4:
20405 case NVPTX::BI__nvvm_ldu_sc4:
20406 case NVPTX::BI__nvvm_ldu_s:
20407 case NVPTX::BI__nvvm_ldu_s2:
20408 case NVPTX::BI__nvvm_ldu_s4:
20409 case NVPTX::BI__nvvm_ldu_i:
20410 case NVPTX::BI__nvvm_ldu_i2:
20411 case NVPTX::BI__nvvm_ldu_i4:
20412 case NVPTX::BI__nvvm_ldu_l:
20413 case NVPTX::BI__nvvm_ldu_l2:
20414 case NVPTX::BI__nvvm_ldu_ll:
20415 case NVPTX::BI__nvvm_ldu_ll2:
20416 case NVPTX::BI__nvvm_ldu_uc:
20417 case NVPTX::BI__nvvm_ldu_uc2:
20418 case NVPTX::BI__nvvm_ldu_uc4:
20419 case NVPTX::BI__nvvm_ldu_us:
20420 case NVPTX::BI__nvvm_ldu_us2:
20421 case NVPTX::BI__nvvm_ldu_us4:
20422 case NVPTX::BI__nvvm_ldu_ui:
20423 case NVPTX::BI__nvvm_ldu_ui2:
20424 case NVPTX::BI__nvvm_ldu_ui4:
20425 case NVPTX::BI__nvvm_ldu_ul:
20426 case NVPTX::BI__nvvm_ldu_ul2:
20427 case NVPTX::BI__nvvm_ldu_ull:
20428 case NVPTX::BI__nvvm_ldu_ull2:
20429 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
20430 case NVPTX::BI__nvvm_ldu_f:
20431 case NVPTX::BI__nvvm_ldu_f2:
20432 case NVPTX::BI__nvvm_ldu_f4:
20433 case NVPTX::BI__nvvm_ldu_d:
20434 case NVPTX::BI__nvvm_ldu_d2:
20435 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
20436
20437 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
20438 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
20439 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
20440 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
20441 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
20442 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
20443 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
20444 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
20445 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
20446 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
20447 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
20448 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
20449 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
20450 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
20451 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
20452 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
20453 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
20454 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
20455 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
20456 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
20457 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
20458 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
20459 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
20460 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
20461 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
20462 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
20463 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
20464 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
20465 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
20466 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
20467 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
20468 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
20469 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
20470 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
20471 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
20472 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
20473 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
20474 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
20475 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
20476 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
20477 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
20478 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
20479 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
20480 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
20481 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
20482 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
20483 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
20484 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
20485 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
20486 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
20487 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
20488 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
20489 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
20490 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
20491 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
20492 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
20493 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
20494 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
20495 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
20496 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
20497 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
20498 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
20499 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
20500 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
20501 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
20502 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
20503 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
20504 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
20505 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
20506 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
20507 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
20508 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
20509 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
20510 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
20511 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
20512 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
20513 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
20514 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
20515 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
20516 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
20517 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
20518 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
20519 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
20520 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
20521 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
20522 Value *Ptr = EmitScalarExpr(E->getArg(0));
20523 llvm::Type *ElemTy =
20524 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20525 return Builder.CreateCall(
20527 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
20528 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20529 }
20530 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
20531 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
20532 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
20533 Value *Ptr = EmitScalarExpr(E->getArg(0));
20534 llvm::Type *ElemTy =
20535 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20536 return Builder.CreateCall(
20538 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
20539 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20540 }
20541 case NVPTX::BI__nvvm_match_all_sync_i32p:
20542 case NVPTX::BI__nvvm_match_all_sync_i64p: {
20543 Value *Mask = EmitScalarExpr(E->getArg(0));
20544 Value *Val = EmitScalarExpr(E->getArg(1));
20545 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
20546 Value *ResultPair = Builder.CreateCall(
20547 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
20548 ? Intrinsic::nvvm_match_all_sync_i32p
20549 : Intrinsic::nvvm_match_all_sync_i64p),
20550 {Mask, Val});
20551 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
20552 PredOutPtr.getElementType());
20553 Builder.CreateStore(Pred, PredOutPtr);
20554 return Builder.CreateExtractValue(ResultPair, 0);
20555 }
20556
20557 // FP MMA loads
20558 case NVPTX::BI__hmma_m16n16k16_ld_a:
20559 case NVPTX::BI__hmma_m16n16k16_ld_b:
20560 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20561 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20562 case NVPTX::BI__hmma_m32n8k16_ld_a:
20563 case NVPTX::BI__hmma_m32n8k16_ld_b:
20564 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20565 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20566 case NVPTX::BI__hmma_m8n32k16_ld_a:
20567 case NVPTX::BI__hmma_m8n32k16_ld_b:
20568 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20569 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20570 // Integer MMA loads.
20571 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20572 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20573 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20574 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20575 case NVPTX::BI__imma_m16n16k16_ld_c:
20576 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20577 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20578 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20579 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20580 case NVPTX::BI__imma_m32n8k16_ld_c:
20581 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20582 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20583 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20584 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20585 case NVPTX::BI__imma_m8n32k16_ld_c:
20586 // Sub-integer MMA loads.
20587 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20588 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20589 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20590 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20591 case NVPTX::BI__imma_m8n8k32_ld_c:
20592 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20593 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20594 case NVPTX::BI__bmma_m8n8k128_ld_c:
20595 // Double MMA loads.
20596 case NVPTX::BI__dmma_m8n8k4_ld_a:
20597 case NVPTX::BI__dmma_m8n8k4_ld_b:
20598 case NVPTX::BI__dmma_m8n8k4_ld_c:
20599 // Alternate float MMA loads.
20600 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20601 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20602 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20603 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20604 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20605 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20606 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20607 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20608 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
20609 Address Dst = EmitPointerWithAlignment(E->getArg(0));
20610 Value *Src = EmitScalarExpr(E->getArg(1));
20611 Value *Ldm = EmitScalarExpr(E->getArg(2));
20612 std::optional<llvm::APSInt> isColMajorArg =
20613 E->getArg(3)->getIntegerConstantExpr(getContext());
20614 if (!isColMajorArg)
20615 return nullptr;
20616 bool isColMajor = isColMajorArg->getSExtValue();
20617 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20618 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20619 if (IID == 0)
20620 return nullptr;
20621
20622 Value *Result =
20623 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
20624
20625 // Save returned values.
20626 assert(II.NumResults);
20627 if (II.NumResults == 1) {
20630 } else {
20631 for (unsigned i = 0; i < II.NumResults; ++i) {
20633 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
20634 Dst.getElementType()),
20636 llvm::ConstantInt::get(IntTy, i)),
20638 }
20639 }
20640 return Result;
20641 }
20642
20643 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20644 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20645 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20646 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20647 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20648 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20649 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20650 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20651 case NVPTX::BI__imma_m8n32k16_st_c_i32:
20652 case NVPTX::BI__imma_m8n8k32_st_c_i32:
20653 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
20654 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
20655 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
20656 Value *Dst = EmitScalarExpr(E->getArg(0));
20657 Address Src = EmitPointerWithAlignment(E->getArg(1));
20658 Value *Ldm = EmitScalarExpr(E->getArg(2));
20659 std::optional<llvm::APSInt> isColMajorArg =
20660 E->getArg(3)->getIntegerConstantExpr(getContext());
20661 if (!isColMajorArg)
20662 return nullptr;
20663 bool isColMajor = isColMajorArg->getSExtValue();
20664 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20665 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20666 if (IID == 0)
20667 return nullptr;
20668 Function *Intrinsic =
20669 CGM.getIntrinsic(IID, Dst->getType());
20670 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
20671 SmallVector<Value *, 10> Values = {Dst};
20672 for (unsigned i = 0; i < II.NumResults; ++i) {
20674 Src.getElementType(),
20676 llvm::ConstantInt::get(IntTy, i)),
20678 Values.push_back(Builder.CreateBitCast(V, ParamType));
20679 }
20680 Values.push_back(Ldm);
20681 Value *Result = Builder.CreateCall(Intrinsic, Values);
20682 return Result;
20683 }
20684
20685 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
20686 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
20687 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
20688 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
20689 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
20690 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
20691 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
20692 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
20693 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
20694 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
20695 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
20696 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
20697 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
20698 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
20699 case NVPTX::BI__imma_m16n16k16_mma_s8:
20700 case NVPTX::BI__imma_m16n16k16_mma_u8:
20701 case NVPTX::BI__imma_m32n8k16_mma_s8:
20702 case NVPTX::BI__imma_m32n8k16_mma_u8:
20703 case NVPTX::BI__imma_m8n32k16_mma_s8:
20704 case NVPTX::BI__imma_m8n32k16_mma_u8:
20705 case NVPTX::BI__imma_m8n8k32_mma_s4:
20706 case NVPTX::BI__imma_m8n8k32_mma_u4:
20707 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20708 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20709 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20710 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20711 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20712 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20713 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
20714 Address Dst = EmitPointerWithAlignment(E->getArg(0));
20715 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
20716 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
20717 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
20718 std::optional<llvm::APSInt> LayoutArg =
20719 E->getArg(4)->getIntegerConstantExpr(getContext());
20720 if (!LayoutArg)
20721 return nullptr;
20722 int Layout = LayoutArg->getSExtValue();
20723 if (Layout < 0 || Layout > 3)
20724 return nullptr;
20725 llvm::APSInt SatfArg;
20726 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
20727 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
20728 SatfArg = 0; // .b1 does not have satf argument.
20729 else if (std::optional<llvm::APSInt> OptSatfArg =
20730 E->getArg(5)->getIntegerConstantExpr(getContext()))
20731 SatfArg = *OptSatfArg;
20732 else
20733 return nullptr;
20734 bool Satf = SatfArg.getSExtValue();
20735 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
20736 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
20737 if (IID == 0) // Unsupported combination of Layout/Satf.
20738 return nullptr;
20739
20741 Function *Intrinsic = CGM.getIntrinsic(IID);
20742 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
20743 // Load A
20744 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
20746 SrcA.getElementType(),
20747 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
20748 llvm::ConstantInt::get(IntTy, i)),
20750 Values.push_back(Builder.CreateBitCast(V, AType));
20751 }
20752 // Load B
20753 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
20754 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
20756 SrcB.getElementType(),
20757 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
20758 llvm::ConstantInt::get(IntTy, i)),
20760 Values.push_back(Builder.CreateBitCast(V, BType));
20761 }
20762 // Load C
20763 llvm::Type *CType =
20764 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
20765 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
20767 SrcC.getElementType(),
20768 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
20769 llvm::ConstantInt::get(IntTy, i)),
20771 Values.push_back(Builder.CreateBitCast(V, CType));
20772 }
20773 Value *Result = Builder.CreateCall(Intrinsic, Values);
20774 llvm::Type *DType = Dst.getElementType();
20775 for (unsigned i = 0; i < MI.NumEltsD; ++i)
20777 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
20779 llvm::ConstantInt::get(IntTy, i)),
20781 return Result;
20782 }
20783 // The following builtins require half type support
20784 case NVPTX::BI__nvvm_ex2_approx_f16:
20785 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
20786 case NVPTX::BI__nvvm_ex2_approx_f16x2:
20787 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
20788 case NVPTX::BI__nvvm_ff2f16x2_rn:
20789 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
20790 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
20791 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
20792 case NVPTX::BI__nvvm_ff2f16x2_rz:
20793 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
20794 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
20795 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
20796 case NVPTX::BI__nvvm_fma_rn_f16:
20797 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
20798 case NVPTX::BI__nvvm_fma_rn_f16x2:
20799 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
20800 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
20801 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
20802 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
20803 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
20804 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
20805 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
20806 *this);
20807 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
20808 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
20809 *this);
20810 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
20811 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
20812 *this);
20813 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
20814 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
20815 *this);
20816 case NVPTX::BI__nvvm_fma_rn_relu_f16:
20817 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
20818 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
20819 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
20820 case NVPTX::BI__nvvm_fma_rn_sat_f16:
20821 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
20822 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
20823 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
20824 case NVPTX::BI__nvvm_fmax_f16:
20825 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
20826 case NVPTX::BI__nvvm_fmax_f16x2:
20827 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
20828 case NVPTX::BI__nvvm_fmax_ftz_f16:
20829 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
20830 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
20831 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
20832 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
20833 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
20834 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
20835 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
20836 *this);
20837 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
20838 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
20839 E, *this);
20840 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
20841 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
20842 BuiltinID, E, *this);
20843 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
20844 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
20845 *this);
20846 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
20847 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
20848 E, *this);
20849 case NVPTX::BI__nvvm_fmax_nan_f16:
20850 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
20851 case NVPTX::BI__nvvm_fmax_nan_f16x2:
20852 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
20853 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
20854 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
20855 *this);
20856 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
20857 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
20858 E, *this);
20859 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
20860 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
20861 *this);
20862 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
20863 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
20864 *this);
20865 case NVPTX::BI__nvvm_fmin_f16:
20866 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
20867 case NVPTX::BI__nvvm_fmin_f16x2:
20868 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
20869 case NVPTX::BI__nvvm_fmin_ftz_f16:
20870 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
20871 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
20872 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
20873 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
20874 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
20875 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
20876 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
20877 *this);
20878 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
20879 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
20880 E, *this);
20881 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
20882 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
20883 BuiltinID, E, *this);
20884 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
20885 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
20886 *this);
20887 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
20888 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
20889 E, *this);
20890 case NVPTX::BI__nvvm_fmin_nan_f16:
20891 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
20892 case NVPTX::BI__nvvm_fmin_nan_f16x2:
20893 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
20894 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
20895 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
20896 *this);
20897 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
20898 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
20899 E, *this);
20900 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
20901 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
20902 *this);
20903 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
20904 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
20905 *this);
20906 case NVPTX::BI__nvvm_ldg_h:
20907 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20908 case NVPTX::BI__nvvm_ldg_h2:
20909 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20910 case NVPTX::BI__nvvm_ldu_h:
20911 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20912 case NVPTX::BI__nvvm_ldu_h2: {
20913 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20914 }
20915 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
20916 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
20917 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
20918 4);
20919 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
20920 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
20921 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
20922 8);
20923 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
20924 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
20925 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
20926 16);
20927 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
20928 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
20929 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
20930 16);
20931 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
20932 return Builder.CreateCall(
20933 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
20934 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
20935 return Builder.CreateCall(
20936 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
20937 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
20938 return Builder.CreateCall(
20939 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
20940 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
20941 return Builder.CreateCall(
20942 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
20943 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
20944 return Builder.CreateCall(
20945 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
20946 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
20947 return Builder.CreateCall(
20948 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
20949 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
20950 return Builder.CreateCall(
20951 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
20952 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
20953 return Builder.CreateCall(
20954 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
20955 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
20956 return Builder.CreateCall(
20957 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
20958 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
20959 return Builder.CreateCall(
20960 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
20961 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
20962 return Builder.CreateCall(
20963 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
20964 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
20965 return Builder.CreateCall(
20966 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
20967 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
20968 return Builder.CreateCall(
20969 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
20970 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
20971 return Builder.CreateCall(
20972 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
20973 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
20974 return Builder.CreateCall(
20975 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
20976 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
20977 return Builder.CreateCall(
20978 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
20979 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
20980 return Builder.CreateCall(
20981 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
20982 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
20983 return Builder.CreateCall(
20984 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
20985 case NVPTX::BI__nvvm_is_explicit_cluster:
20986 return Builder.CreateCall(
20987 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
20988 case NVPTX::BI__nvvm_isspacep_shared_cluster:
20989 return Builder.CreateCall(
20990 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
20991 EmitScalarExpr(E->getArg(0)));
20992 case NVPTX::BI__nvvm_mapa:
20993 return Builder.CreateCall(
20994 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
20995 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20996 case NVPTX::BI__nvvm_mapa_shared_cluster:
20997 return Builder.CreateCall(
20998 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
20999 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21000 case NVPTX::BI__nvvm_getctarank:
21001 return Builder.CreateCall(
21002 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
21003 EmitScalarExpr(E->getArg(0)));
21004 case NVPTX::BI__nvvm_getctarank_shared_cluster:
21005 return Builder.CreateCall(
21006 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
21007 EmitScalarExpr(E->getArg(0)));
21008 case NVPTX::BI__nvvm_barrier_cluster_arrive:
21009 return Builder.CreateCall(
21010 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
21011 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
21012 return Builder.CreateCall(
21013 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
21014 case NVPTX::BI__nvvm_barrier_cluster_wait:
21015 return Builder.CreateCall(
21016 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
21017 case NVPTX::BI__nvvm_fence_sc_cluster:
21018 return Builder.CreateCall(
21019 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
21020 default:
21021 return nullptr;
21022 }
21023}
21024
21025namespace {
21026struct BuiltinAlignArgs {
21027 llvm::Value *Src = nullptr;
21028 llvm::Type *SrcType = nullptr;
21029 llvm::Value *Alignment = nullptr;
21030 llvm::Value *Mask = nullptr;
21031 llvm::IntegerType *IntType = nullptr;
21032
21033 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
21034 QualType AstType = E->getArg(0)->getType();
21035 if (AstType->isArrayType())
21036 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
21037 else
21038 Src = CGF.EmitScalarExpr(E->getArg(0));
21039 SrcType = Src->getType();
21040 if (SrcType->isPointerTy()) {
21041 IntType = IntegerType::get(
21042 CGF.getLLVMContext(),
21043 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
21044 } else {
21045 assert(SrcType->isIntegerTy());
21046 IntType = cast<llvm::IntegerType>(SrcType);
21047 }
21048 Alignment = CGF.EmitScalarExpr(E->getArg(1));
21049 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
21050 auto *One = llvm::ConstantInt::get(IntType, 1);
21051 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
21052 }
21053};
21054} // namespace
21055
21056/// Generate (x & (y-1)) == 0.
21058 BuiltinAlignArgs Args(E, *this);
21059 llvm::Value *SrcAddress = Args.Src;
21060 if (Args.SrcType->isPointerTy())
21061 SrcAddress =
21062 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
21063 return RValue::get(Builder.CreateICmpEQ(
21064 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
21065 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
21066}
21067
21068/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
21069/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
21070/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
21072 BuiltinAlignArgs Args(E, *this);
21073 llvm::Value *SrcForMask = Args.Src;
21074 if (AlignUp) {
21075 // When aligning up we have to first add the mask to ensure we go over the
21076 // next alignment value and then align down to the next valid multiple.
21077 // By adding the mask, we ensure that align_up on an already aligned
21078 // value will not change the value.
21079 if (Args.Src->getType()->isPointerTy()) {
21080 if (getLangOpts().isSignedOverflowDefined())
21081 SrcForMask =
21082 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
21083 else
21084 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
21085 /*SignedIndices=*/true,
21086 /*isSubtraction=*/false,
21087 E->getExprLoc(), "over_boundary");
21088 } else {
21089 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
21090 }
21091 }
21092 // Invert the mask to only clear the lower bits.
21093 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
21094 llvm::Value *Result = nullptr;
21095 if (Args.Src->getType()->isPointerTy()) {
21096 Result = Builder.CreateIntrinsic(
21097 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
21098 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
21099 } else {
21100 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
21101 }
21102 assert(Result->getType() == Args.SrcType);
21103 return RValue::get(Result);
21104}
21105
21107 const CallExpr *E) {
21108 switch (BuiltinID) {
21109 case WebAssembly::BI__builtin_wasm_memory_size: {
21110 llvm::Type *ResultType = ConvertType(E->getType());
21111 Value *I = EmitScalarExpr(E->getArg(0));
21112 Function *Callee =
21113 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
21114 return Builder.CreateCall(Callee, I);
21115 }
21116 case WebAssembly::BI__builtin_wasm_memory_grow: {
21117 llvm::Type *ResultType = ConvertType(E->getType());
21118 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
21119 EmitScalarExpr(E->getArg(1))};
21120 Function *Callee =
21121 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
21122 return Builder.CreateCall(Callee, Args);
21123 }
21124 case WebAssembly::BI__builtin_wasm_tls_size: {
21125 llvm::Type *ResultType = ConvertType(E->getType());
21126 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
21127 return Builder.CreateCall(Callee);
21128 }
21129 case WebAssembly::BI__builtin_wasm_tls_align: {
21130 llvm::Type *ResultType = ConvertType(E->getType());
21131 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
21132 return Builder.CreateCall(Callee);
21133 }
21134 case WebAssembly::BI__builtin_wasm_tls_base: {
21135 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
21136 return Builder.CreateCall(Callee);
21137 }
21138 case WebAssembly::BI__builtin_wasm_throw: {
21139 Value *Tag = EmitScalarExpr(E->getArg(0));
21140 Value *Obj = EmitScalarExpr(E->getArg(1));
21141 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
21142 return Builder.CreateCall(Callee, {Tag, Obj});
21143 }
21144 case WebAssembly::BI__builtin_wasm_rethrow: {
21145 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
21146 return Builder.CreateCall(Callee);
21147 }
21148 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
21149 Value *Addr = EmitScalarExpr(E->getArg(0));
21150 Value *Expected = EmitScalarExpr(E->getArg(1));
21151 Value *Timeout = EmitScalarExpr(E->getArg(2));
21152 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
21153 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
21154 }
21155 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
21156 Value *Addr = EmitScalarExpr(E->getArg(0));
21157 Value *Expected = EmitScalarExpr(E->getArg(1));
21158 Value *Timeout = EmitScalarExpr(E->getArg(2));
21159 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
21160 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
21161 }
21162 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
21163 Value *Addr = EmitScalarExpr(E->getArg(0));
21164 Value *Count = EmitScalarExpr(E->getArg(1));
21165 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
21166 return Builder.CreateCall(Callee, {Addr, Count});
21167 }
21168 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
21169 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
21170 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
21171 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
21172 Value *Src = EmitScalarExpr(E->getArg(0));
21173 llvm::Type *ResT = ConvertType(E->getType());
21174 Function *Callee =
21175 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
21176 return Builder.CreateCall(Callee, {Src});
21177 }
21178 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
21179 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
21180 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
21181 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
21182 Value *Src = EmitScalarExpr(E->getArg(0));
21183 llvm::Type *ResT = ConvertType(E->getType());
21184 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
21185 {ResT, Src->getType()});
21186 return Builder.CreateCall(Callee, {Src});
21187 }
21188 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
21189 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
21190 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
21191 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
21192 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
21193 Value *Src = EmitScalarExpr(E->getArg(0));
21194 llvm::Type *ResT = ConvertType(E->getType());
21195 Function *Callee =
21196 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
21197 return Builder.CreateCall(Callee, {Src});
21198 }
21199 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
21200 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
21201 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
21202 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
21203 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
21204 Value *Src = EmitScalarExpr(E->getArg(0));
21205 llvm::Type *ResT = ConvertType(E->getType());
21206 Function *Callee =
21207 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
21208 return Builder.CreateCall(Callee, {Src});
21209 }
21210 case WebAssembly::BI__builtin_wasm_min_f32:
21211 case WebAssembly::BI__builtin_wasm_min_f64:
21212 case WebAssembly::BI__builtin_wasm_min_f16x8:
21213 case WebAssembly::BI__builtin_wasm_min_f32x4:
21214 case WebAssembly::BI__builtin_wasm_min_f64x2: {
21215 Value *LHS = EmitScalarExpr(E->getArg(0));
21216 Value *RHS = EmitScalarExpr(E->getArg(1));
21217 Function *Callee =
21218 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
21219 return Builder.CreateCall(Callee, {LHS, RHS});
21220 }
21221 case WebAssembly::BI__builtin_wasm_max_f32:
21222 case WebAssembly::BI__builtin_wasm_max_f64:
21223 case WebAssembly::BI__builtin_wasm_max_f16x8:
21224 case WebAssembly::BI__builtin_wasm_max_f32x4:
21225 case WebAssembly::BI__builtin_wasm_max_f64x2: {
21226 Value *LHS = EmitScalarExpr(E->getArg(0));
21227 Value *RHS = EmitScalarExpr(E->getArg(1));
21228 Function *Callee =
21229 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
21230 return Builder.CreateCall(Callee, {LHS, RHS});
21231 }
21232 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
21233 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
21234 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
21235 Value *LHS = EmitScalarExpr(E->getArg(0));
21236 Value *RHS = EmitScalarExpr(E->getArg(1));
21237 Function *Callee =
21238 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
21239 return Builder.CreateCall(Callee, {LHS, RHS});
21240 }
21241 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
21242 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
21243 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
21244 Value *LHS = EmitScalarExpr(E->getArg(0));
21245 Value *RHS = EmitScalarExpr(E->getArg(1));
21246 Function *Callee =
21247 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
21248 return Builder.CreateCall(Callee, {LHS, RHS});
21249 }
21250 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
21251 case WebAssembly::BI__builtin_wasm_floor_f32x4:
21252 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
21253 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
21254 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
21255 case WebAssembly::BI__builtin_wasm_floor_f64x2:
21256 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
21257 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
21258 unsigned IntNo;
21259 switch (BuiltinID) {
21260 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
21261 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
21262 IntNo = Intrinsic::ceil;
21263 break;
21264 case WebAssembly::BI__builtin_wasm_floor_f32x4:
21265 case WebAssembly::BI__builtin_wasm_floor_f64x2:
21266 IntNo = Intrinsic::floor;
21267 break;
21268 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
21269 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
21270 IntNo = Intrinsic::trunc;
21271 break;
21272 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
21273 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
21274 IntNo = Intrinsic::nearbyint;
21275 break;
21276 default:
21277 llvm_unreachable("unexpected builtin ID");
21278 }
21279 Value *Value = EmitScalarExpr(E->getArg(0));
21281 return Builder.CreateCall(Callee, Value);
21282 }
21283 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
21284 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
21285 return Builder.CreateCall(Callee);
21286 }
21287 case WebAssembly::BI__builtin_wasm_ref_null_func: {
21288 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
21289 return Builder.CreateCall(Callee);
21290 }
21291 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
21292 Value *Src = EmitScalarExpr(E->getArg(0));
21293 Value *Indices = EmitScalarExpr(E->getArg(1));
21294 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
21295 return Builder.CreateCall(Callee, {Src, Indices});
21296 }
21297 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
21298 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
21299 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
21300 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
21301 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
21302 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
21303 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
21304 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
21305 unsigned IntNo;
21306 switch (BuiltinID) {
21307 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
21308 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
21309 IntNo = Intrinsic::sadd_sat;
21310 break;
21311 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
21312 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
21313 IntNo = Intrinsic::uadd_sat;
21314 break;
21315 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
21316 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
21317 IntNo = Intrinsic::wasm_sub_sat_signed;
21318 break;
21319 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
21320 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
21321 IntNo = Intrinsic::wasm_sub_sat_unsigned;
21322 break;
21323 default:
21324 llvm_unreachable("unexpected builtin ID");
21325 }
21326 Value *LHS = EmitScalarExpr(E->getArg(0));
21327 Value *RHS = EmitScalarExpr(E->getArg(1));
21329 return Builder.CreateCall(Callee, {LHS, RHS});
21330 }
21331 case WebAssembly::BI__builtin_wasm_abs_i8x16:
21332 case WebAssembly::BI__builtin_wasm_abs_i16x8:
21333 case WebAssembly::BI__builtin_wasm_abs_i32x4:
21334 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
21335 Value *Vec = EmitScalarExpr(E->getArg(0));
21336 Value *Neg = Builder.CreateNeg(Vec, "neg");
21337 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
21338 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
21339 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
21340 }
21341 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
21342 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
21343 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
21344 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
21345 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
21346 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
21347 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
21348 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
21349 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
21350 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
21351 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
21352 case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
21353 Value *LHS = EmitScalarExpr(E->getArg(0));
21354 Value *RHS = EmitScalarExpr(E->getArg(1));
21355 Value *ICmp;
21356 switch (BuiltinID) {
21357 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
21358 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
21359 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
21360 ICmp = Builder.CreateICmpSLT(LHS, RHS);
21361 break;
21362 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
21363 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
21364 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
21365 ICmp = Builder.CreateICmpULT(LHS, RHS);
21366 break;
21367 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
21368 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
21369 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
21370 ICmp = Builder.CreateICmpSGT(LHS, RHS);
21371 break;
21372 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
21373 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
21374 case WebAssembly::BI__builtin_wasm_max_u_i32x4:
21375 ICmp = Builder.CreateICmpUGT(LHS, RHS);
21376 break;
21377 default:
21378 llvm_unreachable("unexpected builtin ID");
21379 }
21380 return Builder.CreateSelect(ICmp, LHS, RHS);
21381 }
21382 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
21383 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
21384 Value *LHS = EmitScalarExpr(E->getArg(0));
21385 Value *RHS = EmitScalarExpr(E->getArg(1));
21386 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
21387 ConvertType(E->getType()));
21388 return Builder.CreateCall(Callee, {LHS, RHS});
21389 }
21390 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
21391 Value *LHS = EmitScalarExpr(E->getArg(0));
21392 Value *RHS = EmitScalarExpr(E->getArg(1));
21393 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
21394 return Builder.CreateCall(Callee, {LHS, RHS});
21395 }
21396 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
21397 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
21398 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
21399 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
21400 Value *Vec = EmitScalarExpr(E->getArg(0));
21401 unsigned IntNo;
21402 switch (BuiltinID) {
21403 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
21404 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
21405 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
21406 break;
21407 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
21408 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
21409 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
21410 break;
21411 default:
21412 llvm_unreachable("unexpected builtin ID");
21413 }
21414
21416 return Builder.CreateCall(Callee, Vec);
21417 }
21418 case WebAssembly::BI__builtin_wasm_bitselect: {
21419 Value *V1 = EmitScalarExpr(E->getArg(0));
21420 Value *V2 = EmitScalarExpr(E->getArg(1));
21421 Value *C = EmitScalarExpr(E->getArg(2));
21422 Function *Callee =
21423 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
21424 return Builder.CreateCall(Callee, {V1, V2, C});
21425 }
21426 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
21427 Value *LHS = EmitScalarExpr(E->getArg(0));
21428 Value *RHS = EmitScalarExpr(E->getArg(1));
21429 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
21430 return Builder.CreateCall(Callee, {LHS, RHS});
21431 }
21432 case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
21433 Value *Vec = EmitScalarExpr(E->getArg(0));
21434 Function *Callee =
21435 CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
21436 return Builder.CreateCall(Callee, {Vec});
21437 }
21438 case WebAssembly::BI__builtin_wasm_any_true_v128:
21439 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21440 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21441 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21442 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
21443 unsigned IntNo;
21444 switch (BuiltinID) {
21445 case WebAssembly::BI__builtin_wasm_any_true_v128:
21446 IntNo = Intrinsic::wasm_anytrue;
21447 break;
21448 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21449 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21450 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21451 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
21452 IntNo = Intrinsic::wasm_alltrue;
21453 break;
21454 default:
21455 llvm_unreachable("unexpected builtin ID");
21456 }
21457 Value *Vec = EmitScalarExpr(E->getArg(0));
21458 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
21459 return Builder.CreateCall(Callee, {Vec});
21460 }
21461 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
21462 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
21463 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
21464 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
21465 Value *Vec = EmitScalarExpr(E->getArg(0));
21466 Function *Callee =
21467 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
21468 return Builder.CreateCall(Callee, {Vec});
21469 }
21470 case WebAssembly::BI__builtin_wasm_abs_f32x4:
21471 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
21472 Value *Vec = EmitScalarExpr(E->getArg(0));
21473 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
21474 return Builder.CreateCall(Callee, {Vec});
21475 }
21476 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
21477 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
21478 Value *Vec = EmitScalarExpr(E->getArg(0));
21479 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
21480 return Builder.CreateCall(Callee, {Vec});
21481 }
21482 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21483 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21484 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21485 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
21486 Value *Low = EmitScalarExpr(E->getArg(0));
21487 Value *High = EmitScalarExpr(E->getArg(1));
21488 unsigned IntNo;
21489 switch (BuiltinID) {
21490 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21491 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21492 IntNo = Intrinsic::wasm_narrow_signed;
21493 break;
21494 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21495 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
21496 IntNo = Intrinsic::wasm_narrow_unsigned;
21497 break;
21498 default:
21499 llvm_unreachable("unexpected builtin ID");
21500 }
21501 Function *Callee =
21502 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
21503 return Builder.CreateCall(Callee, {Low, High});
21504 }
21505 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21506 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
21507 Value *Vec = EmitScalarExpr(E->getArg(0));
21508 unsigned IntNo;
21509 switch (BuiltinID) {
21510 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21511 IntNo = Intrinsic::fptosi_sat;
21512 break;
21513 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
21514 IntNo = Intrinsic::fptoui_sat;
21515 break;
21516 default:
21517 llvm_unreachable("unexpected builtin ID");
21518 }
21519 llvm::Type *SrcT = Vec->getType();
21520 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
21521 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
21522 Value *Trunc = Builder.CreateCall(Callee, Vec);
21523 Value *Splat = Constant::getNullValue(TruncT);
21524 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
21525 }
21526 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
21527 Value *Ops[18];
21528 size_t OpIdx = 0;
21529 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
21530 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
21531 while (OpIdx < 18) {
21532 std::optional<llvm::APSInt> LaneConst =
21533 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
21534 assert(LaneConst && "Constant arg isn't actually constant?");
21535 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
21536 }
21537 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
21538 return Builder.CreateCall(Callee, Ops);
21539 }
21540 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
21541 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
21542 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21543 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21544 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21545 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
21546 Value *A = EmitScalarExpr(E->getArg(0));
21547 Value *B = EmitScalarExpr(E->getArg(1));
21548 Value *C = EmitScalarExpr(E->getArg(2));
21549 unsigned IntNo;
21550 switch (BuiltinID) {
21551 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
21552 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21553 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21554 IntNo = Intrinsic::wasm_relaxed_madd;
21555 break;
21556 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
21557 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21558 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
21559 IntNo = Intrinsic::wasm_relaxed_nmadd;
21560 break;
21561 default:
21562 llvm_unreachable("unexpected builtin ID");
21563 }
21564 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
21565 return Builder.CreateCall(Callee, {A, B, C});
21566 }
21567 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
21568 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
21569 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
21570 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
21571 Value *A = EmitScalarExpr(E->getArg(0));
21572 Value *B = EmitScalarExpr(E->getArg(1));
21573 Value *C = EmitScalarExpr(E->getArg(2));
21574 Function *Callee =
21575 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
21576 return Builder.CreateCall(Callee, {A, B, C});
21577 }
21578 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
21579 Value *Src = EmitScalarExpr(E->getArg(0));
21580 Value *Indices = EmitScalarExpr(E->getArg(1));
21581 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
21582 return Builder.CreateCall(Callee, {Src, Indices});
21583 }
21584 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21585 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21586 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21587 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
21588 Value *LHS = EmitScalarExpr(E->getArg(0));
21589 Value *RHS = EmitScalarExpr(E->getArg(1));
21590 unsigned IntNo;
21591 switch (BuiltinID) {
21592 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21593 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21594 IntNo = Intrinsic::wasm_relaxed_min;
21595 break;
21596 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21597 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
21598 IntNo = Intrinsic::wasm_relaxed_max;
21599 break;
21600 default:
21601 llvm_unreachable("unexpected builtin ID");
21602 }
21603 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
21604 return Builder.CreateCall(Callee, {LHS, RHS});
21605 }
21606 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21607 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21608 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21609 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
21610 Value *Vec = EmitScalarExpr(E->getArg(0));
21611 unsigned IntNo;
21612 switch (BuiltinID) {
21613 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21614 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
21615 break;
21616 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21617 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
21618 break;
21619 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21620 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
21621 break;
21622 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
21623 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
21624 break;
21625 default:
21626 llvm_unreachable("unexpected builtin ID");
21627 }
21628 Function *Callee = CGM.getIntrinsic(IntNo);
21629 return Builder.CreateCall(Callee, {Vec});
21630 }
21631 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
21632 Value *LHS = EmitScalarExpr(E->getArg(0));
21633 Value *RHS = EmitScalarExpr(E->getArg(1));
21634 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
21635 return Builder.CreateCall(Callee, {LHS, RHS});
21636 }
21637 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
21638 Value *LHS = EmitScalarExpr(E->getArg(0));
21639 Value *RHS = EmitScalarExpr(E->getArg(1));
21640 Function *Callee =
21641 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
21642 return Builder.CreateCall(Callee, {LHS, RHS});
21643 }
21644 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
21645 Value *LHS = EmitScalarExpr(E->getArg(0));
21646 Value *RHS = EmitScalarExpr(E->getArg(1));
21647 Value *Acc = EmitScalarExpr(E->getArg(2));
21648 Function *Callee =
21649 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
21650 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21651 }
21652 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
21653 Value *LHS = EmitScalarExpr(E->getArg(0));
21654 Value *RHS = EmitScalarExpr(E->getArg(1));
21655 Value *Acc = EmitScalarExpr(E->getArg(2));
21656 Function *Callee =
21657 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
21658 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21659 }
21660 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
21661 Value *Addr = EmitScalarExpr(E->getArg(0));
21662 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
21663 return Builder.CreateCall(Callee, {Addr});
21664 }
21665 case WebAssembly::BI__builtin_wasm_storef16_f32: {
21666 Value *Val = EmitScalarExpr(E->getArg(0));
21667 Value *Addr = EmitScalarExpr(E->getArg(1));
21668 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
21669 return Builder.CreateCall(Callee, {Val, Addr});
21670 }
21671 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
21672 Value *Val = EmitScalarExpr(E->getArg(0));
21673 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
21674 return Builder.CreateCall(Callee, {Val});
21675 }
21676 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
21677 Value *Vector = EmitScalarExpr(E->getArg(0));
21678 Value *Index = EmitScalarExpr(E->getArg(1));
21679 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
21680 return Builder.CreateCall(Callee, {Vector, Index});
21681 }
21682 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
21683 Value *Vector = EmitScalarExpr(E->getArg(0));
21684 Value *Index = EmitScalarExpr(E->getArg(1));
21685 Value *Val = EmitScalarExpr(E->getArg(2));
21686 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
21687 return Builder.CreateCall(Callee, {Vector, Index, Val});
21688 }
21689 case WebAssembly::BI__builtin_wasm_table_get: {
21690 assert(E->getArg(0)->getType()->isArrayType());
21691 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21692 Value *Index = EmitScalarExpr(E->getArg(1));
21695 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
21696 else if (E->getType().isWebAssemblyFuncrefType())
21697 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
21698 else
21699 llvm_unreachable(
21700 "Unexpected reference type for __builtin_wasm_table_get");
21701 return Builder.CreateCall(Callee, {Table, Index});
21702 }
21703 case WebAssembly::BI__builtin_wasm_table_set: {
21704 assert(E->getArg(0)->getType()->isArrayType());
21705 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21706 Value *Index = EmitScalarExpr(E->getArg(1));
21707 Value *Val = EmitScalarExpr(E->getArg(2));
21709 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
21710 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
21711 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21712 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
21713 else
21714 llvm_unreachable(
21715 "Unexpected reference type for __builtin_wasm_table_set");
21716 return Builder.CreateCall(Callee, {Table, Index, Val});
21717 }
21718 case WebAssembly::BI__builtin_wasm_table_size: {
21719 assert(E->getArg(0)->getType()->isArrayType());
21720 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21721 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
21722 return Builder.CreateCall(Callee, Value);
21723 }
21724 case WebAssembly::BI__builtin_wasm_table_grow: {
21725 assert(E->getArg(0)->getType()->isArrayType());
21726 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21727 Value *Val = EmitScalarExpr(E->getArg(1));
21728 Value *NElems = EmitScalarExpr(E->getArg(2));
21729
21731 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
21732 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
21733 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21734 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21735 else
21736 llvm_unreachable(
21737 "Unexpected reference type for __builtin_wasm_table_grow");
21738
21739 return Builder.CreateCall(Callee, {Table, Val, NElems});
21740 }
21741 case WebAssembly::BI__builtin_wasm_table_fill: {
21742 assert(E->getArg(0)->getType()->isArrayType());
21743 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21744 Value *Index = EmitScalarExpr(E->getArg(1));
21745 Value *Val = EmitScalarExpr(E->getArg(2));
21746 Value *NElems = EmitScalarExpr(E->getArg(3));
21747
21749 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
21750 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
21751 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21752 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21753 else
21754 llvm_unreachable(
21755 "Unexpected reference type for __builtin_wasm_table_fill");
21756
21757 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
21758 }
21759 case WebAssembly::BI__builtin_wasm_table_copy: {
21760 assert(E->getArg(0)->getType()->isArrayType());
21761 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21762 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
21763 Value *DstIdx = EmitScalarExpr(E->getArg(2));
21764 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
21765 Value *NElems = EmitScalarExpr(E->getArg(4));
21766
21767 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
21768
21769 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
21770 }
21771 default:
21772 return nullptr;
21773 }
21774}
21775
21776static std::pair<Intrinsic::ID, unsigned>
21778 struct Info {
21779 unsigned BuiltinID;
21780 Intrinsic::ID IntrinsicID;
21781 unsigned VecLen;
21782 };
21783 static Info Infos[] = {
21784#define CUSTOM_BUILTIN_MAPPING(x,s) \
21785 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
21786 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
21787 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
21788 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
21789 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
21790 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
21791 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
21792 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
21793 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
21794 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
21795 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
21796 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
21797 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
21798 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
21799 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
21800 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
21801 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
21802 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
21803 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
21804 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
21805 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
21806 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
21807 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
21808 // Legacy builtins that take a vector in place of a vector predicate.
21809 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
21810 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
21811 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
21812 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
21813 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
21814 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
21815 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
21816 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
21817#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
21818#undef CUSTOM_BUILTIN_MAPPING
21819 };
21820
21821 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
21822 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
21823 (void)SortOnce;
21824
21825 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
21826 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
21827 return {Intrinsic::not_intrinsic, 0};
21828
21829 return {F->IntrinsicID, F->VecLen};
21830}
21831
21833 const CallExpr *E) {
21834 Intrinsic::ID ID;
21835 unsigned VecLen;
21836 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
21837
21838 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
21839 // The base pointer is passed by address, so it needs to be loaded.
21840 Address A = EmitPointerWithAlignment(E->getArg(0));
21842 llvm::Value *Base = Builder.CreateLoad(BP);
21843 // The treatment of both loads and stores is the same: the arguments for
21844 // the builtin are the same as the arguments for the intrinsic.
21845 // Load:
21846 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
21847 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
21848 // Store:
21849 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
21850 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
21852 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
21853 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21854
21855 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
21856 // The load intrinsics generate two results (Value, NewBase), stores
21857 // generate one (NewBase). The new base address needs to be stored.
21858 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
21859 : Result;
21860 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
21861 Address Dest = EmitPointerWithAlignment(E->getArg(0));
21862 llvm::Value *RetVal =
21863 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
21864 if (IsLoad)
21865 RetVal = Builder.CreateExtractValue(Result, 0);
21866 return RetVal;
21867 };
21868
21869 // Handle the conversion of bit-reverse load intrinsics to bit code.
21870 // The intrinsic call after this function only reads from memory and the
21871 // write to memory is dealt by the store instruction.
21872 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
21873 // The intrinsic generates one result, which is the new value for the base
21874 // pointer. It needs to be returned. The result of the load instruction is
21875 // passed to intrinsic by address, so the value needs to be stored.
21876 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
21877
21878 // Expressions like &(*pt++) will be incremented per evaluation.
21879 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
21880 // per call.
21881 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
21882 DestAddr = DestAddr.withElementType(Int8Ty);
21883 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
21884
21885 // Operands are Base, Dest, Modifier.
21886 // The intrinsic format in LLVM IR is defined as
21887 // { ValueType, i8* } (i8*, i32).
21888 llvm::Value *Result = Builder.CreateCall(
21889 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
21890
21891 // The value needs to be stored as the variable is passed by reference.
21892 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
21893
21894 // The store needs to be truncated to fit the destination type.
21895 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
21896 // to be handled with stores of respective destination type.
21897 DestVal = Builder.CreateTrunc(DestVal, DestTy);
21898
21899 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
21900 // The updated value of the base pointer is returned.
21901 return Builder.CreateExtractValue(Result, 1);
21902 };
21903
21904 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
21905 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
21906 : Intrinsic::hexagon_V6_vandvrt;
21907 return Builder.CreateCall(CGM.getIntrinsic(ID),
21908 {Vec, Builder.getInt32(-1)});
21909 };
21910 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
21911 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
21912 : Intrinsic::hexagon_V6_vandqrt;
21913 return Builder.CreateCall(CGM.getIntrinsic(ID),
21914 {Pred, Builder.getInt32(-1)});
21915 };
21916
21917 switch (BuiltinID) {
21918 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
21919 // and the corresponding C/C++ builtins use loads/stores to update
21920 // the predicate.
21921 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
21922 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
21923 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
21924 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
21925 // Get the type from the 0-th argument.
21926 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21927 Address PredAddr =
21928 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
21929 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
21930 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21931 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
21932
21933 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21934 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21935 PredAddr.getAlignment());
21936 return Builder.CreateExtractValue(Result, 0);
21937 }
21938 // These are identical to the builtins above, except they don't consume
21939 // input carry, only generate carry-out. Since they still produce two
21940 // outputs, generate the store of the predicate, but no load.
21941 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
21942 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
21943 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
21944 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
21945 // Get the type from the 0-th argument.
21946 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21947 Address PredAddr =
21948 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
21949 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21950 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21951
21952 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21953 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21954 PredAddr.getAlignment());
21955 return Builder.CreateExtractValue(Result, 0);
21956 }
21957
21958 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
21959 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
21960 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
21961 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
21962 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
21963 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
21964 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
21965 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
21967 const Expr *PredOp = E->getArg(0);
21968 // There will be an implicit cast to a boolean vector. Strip it.
21969 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
21970 if (Cast->getCastKind() == CK_BitCast)
21971 PredOp = Cast->getSubExpr();
21972 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
21973 }
21974 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
21975 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21976 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
21977 }
21978
21979 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
21980 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
21981 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
21982 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
21983 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
21984 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
21985 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
21986 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
21987 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
21988 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
21989 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
21990 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
21991 return MakeCircOp(ID, /*IsLoad=*/true);
21992 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
21993 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
21994 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
21995 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
21996 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
21997 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
21998 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
21999 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
22000 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
22001 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
22002 return MakeCircOp(ID, /*IsLoad=*/false);
22003 case Hexagon::BI__builtin_brev_ldub:
22004 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
22005 case Hexagon::BI__builtin_brev_ldb:
22006 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
22007 case Hexagon::BI__builtin_brev_lduh:
22008 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
22009 case Hexagon::BI__builtin_brev_ldh:
22010 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
22011 case Hexagon::BI__builtin_brev_ldw:
22012 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
22013 case Hexagon::BI__builtin_brev_ldd:
22014 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
22015 } // switch
22016
22017 return nullptr;
22018}
22019
22021 const CallExpr *E,
22022 ReturnValueSlot ReturnValue) {
22023
22024 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
22025 return EmitRISCVCpuSupports(E);
22026 if (BuiltinID == Builtin::BI__builtin_cpu_init)
22027 return EmitRISCVCpuInit();
22028
22030 llvm::Type *ResultType = ConvertType(E->getType());
22031
22032 // Find out if any arguments are required to be integer constant expressions.
22033 unsigned ICEArguments = 0;
22035 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
22036 if (Error == ASTContext::GE_Missing_type) {
22037 // Vector intrinsics don't have a type string.
22038 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
22039 BuiltinID <= clang::RISCV::LastRVVBuiltin);
22040 ICEArguments = 0;
22041 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
22042 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
22043 ICEArguments = 1 << 1;
22044 } else {
22045 assert(Error == ASTContext::GE_None && "Unexpected error");
22046 }
22047
22048 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
22049 ICEArguments |= (1 << 1);
22050 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
22051 ICEArguments |= (1 << 2);
22052
22053 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
22054 // Handle aggregate argument, namely RVV tuple types in segment load/store
22055 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
22056 LValue L = EmitAggExprToLValue(E->getArg(i));
22057 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());
22058 Ops.push_back(AggValue);
22059 continue;
22060 }
22061 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
22062 }
22063
22064 Intrinsic::ID ID = Intrinsic::not_intrinsic;
22065 unsigned NF = 1;
22066 // The 0th bit simulates the `vta` of RVV
22067 // The 1st bit simulates the `vma` of RVV
22068 constexpr unsigned RVV_VTA = 0x1;
22069 constexpr unsigned RVV_VMA = 0x2;
22070 int PolicyAttrs = 0;
22071 bool IsMasked = false;
22072
22073 // Required for overloaded intrinsics.
22075 switch (BuiltinID) {
22076 default: llvm_unreachable("unexpected builtin ID");
22077 case RISCV::BI__builtin_riscv_orc_b_32:
22078 case RISCV::BI__builtin_riscv_orc_b_64:
22079 case RISCV::BI__builtin_riscv_clz_32:
22080 case RISCV::BI__builtin_riscv_clz_64:
22081 case RISCV::BI__builtin_riscv_ctz_32:
22082 case RISCV::BI__builtin_riscv_ctz_64:
22083 case RISCV::BI__builtin_riscv_clmul_32:
22084 case RISCV::BI__builtin_riscv_clmul_64:
22085 case RISCV::BI__builtin_riscv_clmulh_32:
22086 case RISCV::BI__builtin_riscv_clmulh_64:
22087 case RISCV::BI__builtin_riscv_clmulr_32:
22088 case RISCV::BI__builtin_riscv_clmulr_64:
22089 case RISCV::BI__builtin_riscv_xperm4_32:
22090 case RISCV::BI__builtin_riscv_xperm4_64:
22091 case RISCV::BI__builtin_riscv_xperm8_32:
22092 case RISCV::BI__builtin_riscv_xperm8_64:
22093 case RISCV::BI__builtin_riscv_brev8_32:
22094 case RISCV::BI__builtin_riscv_brev8_64:
22095 case RISCV::BI__builtin_riscv_zip_32:
22096 case RISCV::BI__builtin_riscv_unzip_32: {
22097 switch (BuiltinID) {
22098 default: llvm_unreachable("unexpected builtin ID");
22099 // Zbb
22100 case RISCV::BI__builtin_riscv_orc_b_32:
22101 case RISCV::BI__builtin_riscv_orc_b_64:
22102 ID = Intrinsic::riscv_orc_b;
22103 break;
22104 case RISCV::BI__builtin_riscv_clz_32:
22105 case RISCV::BI__builtin_riscv_clz_64: {
22106 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
22107 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
22108 if (Result->getType() != ResultType)
22109 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
22110 "cast");
22111 return Result;
22112 }
22113 case RISCV::BI__builtin_riscv_ctz_32:
22114 case RISCV::BI__builtin_riscv_ctz_64: {
22115 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
22116 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
22117 if (Result->getType() != ResultType)
22118 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
22119 "cast");
22120 return Result;
22121 }
22122
22123 // Zbc
22124 case RISCV::BI__builtin_riscv_clmul_32:
22125 case RISCV::BI__builtin_riscv_clmul_64:
22126 ID = Intrinsic::riscv_clmul;
22127 break;
22128 case RISCV::BI__builtin_riscv_clmulh_32:
22129 case RISCV::BI__builtin_riscv_clmulh_64:
22130 ID = Intrinsic::riscv_clmulh;
22131 break;
22132 case RISCV::BI__builtin_riscv_clmulr_32:
22133 case RISCV::BI__builtin_riscv_clmulr_64:
22134 ID = Intrinsic::riscv_clmulr;
22135 break;
22136
22137 // Zbkx
22138 case RISCV::BI__builtin_riscv_xperm8_32:
22139 case RISCV::BI__builtin_riscv_xperm8_64:
22140 ID = Intrinsic::riscv_xperm8;
22141 break;
22142 case RISCV::BI__builtin_riscv_xperm4_32:
22143 case RISCV::BI__builtin_riscv_xperm4_64:
22144 ID = Intrinsic::riscv_xperm4;
22145 break;
22146
22147 // Zbkb
22148 case RISCV::BI__builtin_riscv_brev8_32:
22149 case RISCV::BI__builtin_riscv_brev8_64:
22150 ID = Intrinsic::riscv_brev8;
22151 break;
22152 case RISCV::BI__builtin_riscv_zip_32:
22153 ID = Intrinsic::riscv_zip;
22154 break;
22155 case RISCV::BI__builtin_riscv_unzip_32:
22156 ID = Intrinsic::riscv_unzip;
22157 break;
22158 }
22159
22160 IntrinsicTypes = {ResultType};
22161 break;
22162 }
22163
22164 // Zk builtins
22165
22166 // Zknh
22167 case RISCV::BI__builtin_riscv_sha256sig0:
22168 ID = Intrinsic::riscv_sha256sig0;
22169 break;
22170 case RISCV::BI__builtin_riscv_sha256sig1:
22171 ID = Intrinsic::riscv_sha256sig1;
22172 break;
22173 case RISCV::BI__builtin_riscv_sha256sum0:
22174 ID = Intrinsic::riscv_sha256sum0;
22175 break;
22176 case RISCV::BI__builtin_riscv_sha256sum1:
22177 ID = Intrinsic::riscv_sha256sum1;
22178 break;
22179
22180 // Zksed
22181 case RISCV::BI__builtin_riscv_sm4ks:
22182 ID = Intrinsic::riscv_sm4ks;
22183 break;
22184 case RISCV::BI__builtin_riscv_sm4ed:
22185 ID = Intrinsic::riscv_sm4ed;
22186 break;
22187
22188 // Zksh
22189 case RISCV::BI__builtin_riscv_sm3p0:
22190 ID = Intrinsic::riscv_sm3p0;
22191 break;
22192 case RISCV::BI__builtin_riscv_sm3p1:
22193 ID = Intrinsic::riscv_sm3p1;
22194 break;
22195
22196 // Zihintntl
22197 case RISCV::BI__builtin_riscv_ntl_load: {
22198 llvm::Type *ResTy = ConvertType(E->getType());
22199 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
22200 if (Ops.size() == 2)
22201 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
22202
22203 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
22205 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
22206 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
22207 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
22208
22209 int Width;
22210 if(ResTy->isScalableTy()) {
22211 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
22212 llvm::Type *ScalarTy = ResTy->getScalarType();
22213 Width = ScalarTy->getPrimitiveSizeInBits() *
22214 SVTy->getElementCount().getKnownMinValue();
22215 } else
22216 Width = ResTy->getPrimitiveSizeInBits();
22217 LoadInst *Load = Builder.CreateLoad(
22218 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
22219
22220 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
22221 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
22222 RISCVDomainNode);
22223
22224 return Load;
22225 }
22226 case RISCV::BI__builtin_riscv_ntl_store: {
22227 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
22228 if (Ops.size() == 3)
22229 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
22230
22231 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
22233 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
22234 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
22235 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
22236
22237 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
22238 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
22239 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
22240 RISCVDomainNode);
22241
22242 return Store;
22243 }
22244
22245 // Vector builtins are handled from here.
22246#include "clang/Basic/riscv_vector_builtin_cg.inc"
22247 // SiFive Vector builtins are handled from here.
22248#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
22249 }
22250
22251 assert(ID != Intrinsic::not_intrinsic);
22252
22253 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
22254 return Builder.CreateCall(F, Ops, "");
22255}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3341
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
static constexpr SparcCPUInfo CPUInfo[]
Definition: Sparc.cpp:67
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8476
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9312
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:214
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1191
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6502
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:393
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2057
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2023
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6371
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2514
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9282
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:801
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9275
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:7517
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9502
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7529
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7499
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:8544
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2392
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:446
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:739
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:496
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6498
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7530
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1389
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:7534
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:861
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:614
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2420
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:470
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:769
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9271
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7531
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9349
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6256
constexpr unsigned SVEBitsPerBlock
Definition: CGBuiltin.cpp:9786
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1774
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7341
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6495
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:513
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:266
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9376
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1620
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1474
Value * emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:590
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1252
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6507
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:660
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:651
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2250
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:809
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1328
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9338
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:550
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:757
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2304
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition: CGBuiltin.cpp:9788
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2443
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6330
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:257
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9364
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:185
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8455
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:75
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8447
@ VolatileRead
Definition: CGBuiltin.cpp:8449
@ NormalRead
Definition: CGBuiltin.cpp:8448
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:351
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:196
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2292
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:311
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:530
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:174
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9304
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7526
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6826
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:238
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:724
@ UnsignedAlts
Definition: CGBuiltin.cpp:6465
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6470
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6474
@ Use64BitVectors
Definition: CGBuiltin.cpp:6467
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6462
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6472
@ InventFloatType
Definition: CGBuiltin.cpp:6464
@ AddRetType
Definition: CGBuiltin.cpp:6457
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6459
@ VectorizeRetType
Definition: CGBuiltin.cpp:6461
@ VectorRet
Definition: CGBuiltin.cpp:6471
@ Add1ArgType
Definition: CGBuiltin.cpp:6458
@ Use128BitVectors
Definition: CGBuiltin.cpp:6468
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:7592
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:602
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:632
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:482
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2525
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1201
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2258
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:569
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:689
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:249
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1237
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8374
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2017
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:457
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7528
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:7101
CodeGenFunction::ComplexPairTy ComplexPairTy
const Decl * D
Expr * E
const Environment & Env
Definition: HTMLLogger.cpp:148
unsigned Iter
Definition: HTMLLogger.cpp:154
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:143
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
SourceRange Range
Definition: SemaObjC.cpp:758
SourceLocation Loc
Definition: SemaObjC.cpp:759
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:423
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:187
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1146
IdentifierTable & Idents
Definition: ASTContext.h:660
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:662
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType IntTy
Definition: ASTContext.h:1128
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2117
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2675
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2394
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1119
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:779
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2296
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2299
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
CharUnits getSize() const
getSize - Get the record size in characters.
Definition: RecordLayout.h:193
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3566
QualType getElementType() const
Definition: Type.h:3578
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:149
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:247
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:251
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:122
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2830
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
An aggregate value slot.
Definition: CGValue.h:504
Address getAddress() const
Definition: CGValue.h:644
A scoped helper to set the current debug location to the specified location or preferred location of ...
Definition: CGDebugInfo.h:855
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:895
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:912
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:135
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:142
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:291
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:387
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:379
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:179
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:403
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:150
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:396
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:168
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:107
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:314
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:435
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:363
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:127
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:188
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:260
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:344
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:259
All available information about a concrete callee.
Definition: CGCall.h:63
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:137
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
llvm::DILocation * CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg)
Create a debug location from TrapLocation that adds an artificial inline frame where the frame name i...
CGFunctionInfo - Class to encapsulate the information about a function definition.
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:274
void add(RValue rvalue, QualType type)
Definition: CGCall.h:298
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitRISCVCpuSupports(const CallExpr *E)
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
llvm::Value * EmitLoadOfCountedByField(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **callOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
Address makeNaturalAddressForPointer(llvm::Value *Ptr, QualType T, CharUnits Alignment=CharUnits::Zero(), bool ForPointeeType=false, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
Construct an address with the natural alignment of T.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * FormSVEBuiltinResult(llvm::Value *Call)
FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider vector.
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
const FieldDecl * FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset)
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV, QualType Type, SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getRawFunctionPointer(GlobalDecl GD, llvm::Type *Ty=nullptr)
Return a function pointer for a reference to the given function.
Definition: CGExpr.cpp:2879
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:100
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
const LangOptions & getLangOpts() const
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=std::nullopt)
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1607
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:680
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:668
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:368
LValue - This represents an lvalue references.
Definition: CGValue.h:182
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:419
llvm::Value * getPointer(CodeGenFunction &CGF) const
Address getAddress() const
Definition: CGValue.h:361
void setNontemporal(bool Value)
Definition: CGValue.h:319
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
static RValue getIgnored()
Definition: CGValue.h:93
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:125
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:372
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:152
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:142
const T & getABIInfo() const
Definition: TargetInfo.h:57
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:124
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:161
Complex values, per C99 6.2.5p11.
Definition: Type.h:3134
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:4219
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:2014
T * getAttr() const
Definition: DeclBase.h:580
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:600
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:249
DeclContext * getDeclContext()
Definition: DeclBase.h:455
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:435
bool hasAttr() const
Definition: DeclBase.h:584
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:192
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1547
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3075
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3070
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3066
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:830
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3567
std::optional< std::string > tryEvaluateString(ASTContext &Ctx) const
If the current Expr can be evaluated to a pointer to a null-terminated constant string,...
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3050
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3941
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:226
Represents difference between two FPOptions values.
Definition: LangOptions.h:947
Represents a member of a struct/union/class.
Definition: Decl.h:3030
const FieldDecl * findCountedByField() const
Find the FieldDecl specified in a FAM's "counted_by" attribute.
Definition: Decl.cpp:4681
Represents a function declaration or definition.
Definition: Decl.h:1932
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2669
unsigned getBuiltinID(bool ConsiderWrapperFunctions=false) const
Returns a value indicating whether this function corresponds to a builtin function.
Definition: Decl.cpp:3618
Represents a prototype with parameter type info, e.g.
Definition: Type.h:5002
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5378
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:292
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:7599
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3187
QualType getPointeeType() const
Definition: Type.h:3197
A (possibly-)qualified type.
Definition: Type.h:941
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:7834
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2848
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:7876
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2844
The collection of all-type qualifiers we support.
Definition: Type.h:319
Represents a struct/union/class.
Definition: Decl.h:4145
field_range fields() const
Definition: Decl.h:4351
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
bool isUnion() const
Definition: Decl.h:3767
Exposes information about the current target.
Definition: TargetInfo.h:218
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:312
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:687
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1256
bool isLittleEndian() const
Definition: TargetInfo.h:1666
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:851
bool isBigEndian() const
Definition: TargetInfo.h:1665
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1672
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:723
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1829
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1882
bool isBlockPointerType() const
Definition: Type.h:8017
bool isVoidType() const
Definition: Type.h:8319
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2146
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:677
bool isArrayType() const
Definition: Type.h:8075
bool isCountAttributedType() const
Definition: Type.cpp:694
bool isPointerType() const
Definition: Type.h:8003
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8359
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8607
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:705
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8434
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2236
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2186
bool isObjCObjectPointerType() const
Definition: Type.h:8145
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2258
bool isVectorType() const
Definition: Type.h:8115
bool isFloatingType() const
Definition: Type.cpp:2249
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2196
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8540
bool isRecordType() const
Definition: Type.h:8103
bool isSizelessVectorType() const
Returns true for all scalable vector types.
Definition: Type.cpp:2479
QualType getSizelessVectorEltType(const ASTContext &Ctx) const
Returns the representative type for the element of a sizeless vector builtin type.
Definition: Type.cpp:2536
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1886
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:667
QualType getType() const
Definition: Decl.h:678
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:4021
unsigned getNumElements() const
Definition: Type.h:4036
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:181
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: Integral.h:29
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1144
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.h:275
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2242
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:407
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:670
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1665
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2051
The JSON file list parser is used to communicate input to InstallAPI.
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:154
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
const FunctionProtoType * T
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
unsigned long uint64_t
long int64_t
int int32_t
unsigned int uint32_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:25
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:176
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:168
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:159
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742