clang 19.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CodeGenFunction.h"
21#include "CodeGenModule.h"
22#include "ConstantEmitter.h"
23#include "PatternInit.h"
24#include "TargetInfo.h"
26#include "clang/AST/Attr.h"
27#include "clang/AST/Decl.h"
28#include "clang/AST/OSLog.h"
35#include "llvm/ADT/APFloat.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/FloatingPointMode.h"
38#include "llvm/ADT/SmallPtrSet.h"
39#include "llvm/ADT/StringExtras.h"
40#include "llvm/Analysis/ValueTracking.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/IR/Intrinsics.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/IntrinsicsAMDGPU.h"
46#include "llvm/IR/IntrinsicsARM.h"
47#include "llvm/IR/IntrinsicsBPF.h"
48#include "llvm/IR/IntrinsicsDirectX.h"
49#include "llvm/IR/IntrinsicsHexagon.h"
50#include "llvm/IR/IntrinsicsNVPTX.h"
51#include "llvm/IR/IntrinsicsPowerPC.h"
52#include "llvm/IR/IntrinsicsR600.h"
53#include "llvm/IR/IntrinsicsRISCV.h"
54#include "llvm/IR/IntrinsicsS390.h"
55#include "llvm/IR/IntrinsicsVE.h"
56#include "llvm/IR/IntrinsicsWebAssembly.h"
57#include "llvm/IR/IntrinsicsX86.h"
58#include "llvm/IR/MDBuilder.h"
59#include "llvm/IR/MatrixBuilder.h"
60#include "llvm/Support/ConvertUTF.h"
61#include "llvm/Support/MathExtras.h"
62#include "llvm/Support/ScopedPrinter.h"
63#include "llvm/TargetParser/AArch64TargetParser.h"
64#include "llvm/TargetParser/X86TargetParser.h"
65#include <optional>
66#include <sstream>
67
68using namespace clang;
69using namespace CodeGen;
70using namespace llvm;
71
72static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
73 Align AlignmentInBytes) {
74 ConstantInt *Byte;
75 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
76 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
77 // Nothing to initialize.
78 return;
79 case LangOptions::TrivialAutoVarInitKind::Zero:
80 Byte = CGF.Builder.getInt8(0x00);
81 break;
82 case LangOptions::TrivialAutoVarInitKind::Pattern: {
83 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
84 Byte = llvm::dyn_cast<llvm::ConstantInt>(
85 initializationPatternFor(CGF.CGM, Int8));
86 break;
87 }
88 }
89 if (CGF.CGM.stopAutoInit())
90 return;
91 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
92 I->addAnnotationMetadata("auto-init");
93}
94
95/// getBuiltinLibFunction - Given a builtin id for a function like
96/// "__builtin_fabsf", return a Function* for "fabsf".
98 unsigned BuiltinID) {
99 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
100
101 // Get the name, skip over the __builtin_ prefix (if necessary).
102 StringRef Name;
103 GlobalDecl D(FD);
104
105 // TODO: This list should be expanded or refactored after all GCC-compatible
106 // std libcall builtins are implemented.
107 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
108 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
109 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
110 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
111 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
112 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
113 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
114 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
115 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
116 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
117 {Builtin::BI__builtin_printf, "__printfieee128"},
118 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
119 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
120 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
121 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
122 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
123 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
124 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
125 {Builtin::BI__builtin_scanf, "__scanfieee128"},
126 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
127 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
128 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
129 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
130 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
131 };
132
133 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
134 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
135 // if it is 64-bit 'long double' mode.
136 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
137 {Builtin::BI__builtin_frexpl, "frexp"},
138 {Builtin::BI__builtin_ldexpl, "ldexp"},
139 {Builtin::BI__builtin_modfl, "modf"},
140 };
141
142 // If the builtin has been declared explicitly with an assembler label,
143 // use the mangled name. This differs from the plain label on platforms
144 // that prefix labels.
145 if (FD->hasAttr<AsmLabelAttr>())
146 Name = getMangledName(D);
147 else {
148 // TODO: This mutation should also be applied to other targets other than
149 // PPC, after backend supports IEEE 128-bit style libcalls.
150 if (getTriple().isPPC64() &&
151 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
152 F128Builtins.contains(BuiltinID))
153 Name = F128Builtins[BuiltinID];
154 else if (getTriple().isOSAIX() &&
155 &getTarget().getLongDoubleFormat() ==
156 &llvm::APFloat::IEEEdouble() &&
157 AIXLongDouble64Builtins.contains(BuiltinID))
158 Name = AIXLongDouble64Builtins[BuiltinID];
159 else
160 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
161 }
162
163 llvm::FunctionType *Ty =
164 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
165
166 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
167}
168
169/// Emit the conversions required to turn the given value into an
170/// integer of the given size.
171static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
172 QualType T, llvm::IntegerType *IntType) {
173 V = CGF.EmitToMemory(V, T);
174
175 if (V->getType()->isPointerTy())
176 return CGF.Builder.CreatePtrToInt(V, IntType);
177
178 assert(V->getType() == IntType);
179 return V;
180}
181
182static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
183 QualType T, llvm::Type *ResultType) {
184 V = CGF.EmitFromMemory(V, T);
185
186 if (ResultType->isPointerTy())
187 return CGF.Builder.CreateIntToPtr(V, ResultType);
188
189 assert(V->getType() == ResultType);
190 return V;
191}
192
194 ASTContext &Ctx = CGF.getContext();
195 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
196 unsigned Bytes = Ptr.getElementType()->isPointerTy()
198 : Ptr.getElementType()->getScalarSizeInBits() / 8;
199 unsigned Align = Ptr.getAlignment().getQuantity();
200 if (Align % Bytes != 0) {
201 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
202 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
203 // Force address to be at least naturally-aligned.
204 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
205 }
206 return Ptr;
207}
208
209/// Utility to insert an atomic instruction based on Intrinsic::ID
210/// and the expression node.
212 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
213 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
214
215 QualType T = E->getType();
216 assert(E->getArg(0)->getType()->isPointerType());
218 E->getArg(0)->getType()->getPointeeType()));
219 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
220
221 Address DestAddr = CheckAtomicAlignment(CGF, E);
222
223 llvm::IntegerType *IntType = llvm::IntegerType::get(
224 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
225
226 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
227 llvm::Type *ValueType = Val->getType();
228 Val = EmitToInt(CGF, Val, T, IntType);
229
230 llvm::Value *Result =
231 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
232 return EmitFromInt(CGF, Result, T, ValueType);
233}
234
236 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
237 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
238
239 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
240 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
241 LV.setNontemporal(true);
242 CGF.EmitStoreOfScalar(Val, LV, false);
243 return nullptr;
244}
245
247 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
248
249 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
250 LV.setNontemporal(true);
251 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
252}
253
255 llvm::AtomicRMWInst::BinOp Kind,
256 const CallExpr *E) {
257 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
258}
259
260/// Utility to insert an atomic instruction based Intrinsic::ID and
261/// the expression node, where the return value is the result of the
262/// operation.
264 llvm::AtomicRMWInst::BinOp Kind,
265 const CallExpr *E,
266 Instruction::BinaryOps Op,
267 bool Invert = false) {
268 QualType T = E->getType();
269 assert(E->getArg(0)->getType()->isPointerType());
271 E->getArg(0)->getType()->getPointeeType()));
272 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
273
274 Address DestAddr = CheckAtomicAlignment(CGF, E);
275
276 llvm::IntegerType *IntType = llvm::IntegerType::get(
277 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
278
279 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
280 llvm::Type *ValueType = Val->getType();
281 Val = EmitToInt(CGF, Val, T, IntType);
282
283 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
284 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
285 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
286 if (Invert)
287 Result =
288 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
289 llvm::ConstantInt::getAllOnesValue(IntType));
290 Result = EmitFromInt(CGF, Result, T, ValueType);
291 return RValue::get(Result);
292}
293
294/// Utility to insert an atomic cmpxchg instruction.
295///
296/// @param CGF The current codegen function.
297/// @param E Builtin call expression to convert to cmpxchg.
298/// arg0 - address to operate on
299/// arg1 - value to compare with
300/// arg2 - new value
301/// @param ReturnBool Specifies whether to return success flag of
302/// cmpxchg result or the old value.
303///
304/// @returns result of cmpxchg, according to ReturnBool
305///
306/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
307/// invoke the function EmitAtomicCmpXchgForMSIntrin.
309 bool ReturnBool) {
310 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
311 Address DestAddr = CheckAtomicAlignment(CGF, E);
312
313 llvm::IntegerType *IntType = llvm::IntegerType::get(
314 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
315
316 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
317 llvm::Type *ValueType = Cmp->getType();
318 Cmp = EmitToInt(CGF, Cmp, T, IntType);
319 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
320
322 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
323 llvm::AtomicOrdering::SequentiallyConsistent);
324 if (ReturnBool)
325 // Extract boolean success flag and zext it to int.
326 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
327 CGF.ConvertType(E->getType()));
328 else
329 // Extract old value and emit it using the same type as compare value.
330 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
331 ValueType);
332}
333
334/// This function should be invoked to emit atomic cmpxchg for Microsoft's
335/// _InterlockedCompareExchange* intrinsics which have the following signature:
336/// T _InterlockedCompareExchange(T volatile *Destination,
337/// T Exchange,
338/// T Comparand);
339///
340/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
341/// cmpxchg *Destination, Comparand, Exchange.
342/// So we need to swap Comparand and Exchange when invoking
343/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
344/// function MakeAtomicCmpXchgValue since it expects the arguments to be
345/// already swapped.
346
347static
349 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
350 assert(E->getArg(0)->getType()->isPointerType());
352 E->getType(), E->getArg(0)->getType()->getPointeeType()));
353 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
354 E->getArg(1)->getType()));
355 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
356 E->getArg(2)->getType()));
357
358 Address DestAddr = CheckAtomicAlignment(CGF, E);
359
360 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
361 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
362
363 // For Release ordering, the failure ordering should be Monotonic.
364 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
365 AtomicOrdering::Monotonic :
366 SuccessOrdering;
367
368 // The atomic instruction is marked volatile for consistency with MSVC. This
369 // blocks the few atomics optimizations that LLVM has. If we want to optimize
370 // _Interlocked* operations in the future, we will have to remove the volatile
371 // marker.
373 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
374 Result->setVolatile(true);
375 return CGF.Builder.CreateExtractValue(Result, 0);
376}
377
378// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
379// prototyped like this:
380//
381// unsigned char _InterlockedCompareExchange128...(
382// __int64 volatile * _Destination,
383// __int64 _ExchangeHigh,
384// __int64 _ExchangeLow,
385// __int64 * _ComparandResult);
386//
387// Note that Destination is assumed to be at least 16-byte aligned, despite
388// being typed int64.
389
391 const CallExpr *E,
392 AtomicOrdering SuccessOrdering) {
393 assert(E->getNumArgs() == 4);
394 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
395 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
396 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
397 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
398
399 assert(DestPtr->getType()->isPointerTy());
400 assert(!ExchangeHigh->getType()->isPointerTy());
401 assert(!ExchangeLow->getType()->isPointerTy());
402
403 // For Release ordering, the failure ordering should be Monotonic.
404 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
405 ? AtomicOrdering::Monotonic
406 : SuccessOrdering;
407
408 // Convert to i128 pointers and values. Alignment is also overridden for
409 // destination pointer.
410 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
411 Address DestAddr(DestPtr, Int128Ty,
413 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
414
415 // (((i128)hi) << 64) | ((i128)lo)
416 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
417 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
418 ExchangeHigh =
419 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
420 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
421
422 // Load the comparand for the instruction.
423 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
424
425 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
426 SuccessOrdering, FailureOrdering);
427
428 // The atomic instruction is marked volatile for consistency with MSVC. This
429 // blocks the few atomics optimizations that LLVM has. If we want to optimize
430 // _Interlocked* operations in the future, we will have to remove the volatile
431 // marker.
432 CXI->setVolatile(true);
433
434 // Store the result as an outparameter.
435 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
436 ComparandAddr);
437
438 // Get the success boolean and zero extend it to i8.
439 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
440 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
441}
442
444 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
445 assert(E->getArg(0)->getType()->isPointerType());
446
447 auto *IntTy = CGF.ConvertType(E->getType());
448 Address DestAddr = CheckAtomicAlignment(CGF, E);
449 auto *Result = CGF.Builder.CreateAtomicRMW(
450 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
451 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
452}
453
455 CodeGenFunction &CGF, const CallExpr *E,
456 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
457 assert(E->getArg(0)->getType()->isPointerType());
458
459 auto *IntTy = CGF.ConvertType(E->getType());
460 Address DestAddr = CheckAtomicAlignment(CGF, E);
461 auto *Result = CGF.Builder.CreateAtomicRMW(
462 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
463 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
464}
465
466// Build a plain volatile load.
468 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
469 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
470 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
471 llvm::Type *ITy =
472 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
473 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
474 Load->setVolatile(true);
475 return Load;
476}
477
478// Build a plain volatile store.
480 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
481 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
482 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
483 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
484 llvm::StoreInst *Store =
485 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
486 Store->setVolatile(true);
487 return Store;
488}
489
490// Emit a simple mangled intrinsic that has 1 argument and a return type
491// matching the argument type. Depending on mode, this may be a constrained
492// floating-point intrinsic.
494 const CallExpr *E, unsigned IntrinsicID,
495 unsigned ConstrainedIntrinsicID) {
496 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
497
498 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
499 if (CGF.Builder.getIsFPConstrained()) {
500 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
501 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
502 } else {
503 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
504 return CGF.Builder.CreateCall(F, Src0);
505 }
506}
507
508// Emit an intrinsic that has 2 operands of the same type as its result.
509// Depending on mode, this may be a constrained floating-point intrinsic.
511 const CallExpr *E, unsigned IntrinsicID,
512 unsigned ConstrainedIntrinsicID) {
513 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
514 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
515
516 if (CGF.Builder.getIsFPConstrained()) {
517 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
518 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
519 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
520 } else {
521 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
522 return CGF.Builder.CreateCall(F, { Src0, Src1 });
523 }
524}
525
526// Has second type mangled argument.
528 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
529 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
530 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
531 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
532
533 if (CGF.Builder.getIsFPConstrained()) {
534 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
535 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
536 {Src0->getType(), Src1->getType()});
537 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
538 }
539
540 Function *F =
541 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
542 return CGF.Builder.CreateCall(F, {Src0, Src1});
543}
544
545// Emit an intrinsic that has 3 operands of the same type as its result.
546// Depending on mode, this may be a constrained floating-point intrinsic.
548 const CallExpr *E, unsigned IntrinsicID,
549 unsigned ConstrainedIntrinsicID) {
550 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
551 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
552 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
553
554 if (CGF.Builder.getIsFPConstrained()) {
555 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
556 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
557 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
558 } else {
559 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
560 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
561 }
562}
563
564// Emit an intrinsic where all operands are of the same type as the result.
565// Depending on mode, this may be a constrained floating-point intrinsic.
567 unsigned IntrinsicID,
568 unsigned ConstrainedIntrinsicID,
569 llvm::Type *Ty,
570 ArrayRef<Value *> Args) {
571 Function *F;
572 if (CGF.Builder.getIsFPConstrained())
573 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
574 else
575 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
576
577 if (CGF.Builder.getIsFPConstrained())
578 return CGF.Builder.CreateConstrainedFPCall(F, Args);
579 else
580 return CGF.Builder.CreateCall(F, Args);
581}
582
583// Emit a simple mangled intrinsic that has 1 argument and a return type
584// matching the argument type.
586 unsigned IntrinsicID,
587 llvm::StringRef Name = "") {
588 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
589
590 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
591 return CGF.Builder.CreateCall(F, Src0, Name);
592}
593
594// Emit an intrinsic that has 2 operands of the same type as its result.
596 const CallExpr *E,
597 unsigned IntrinsicID) {
598 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
599 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
600
601 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
602 return CGF.Builder.CreateCall(F, { Src0, Src1 });
603}
604
605// Emit an intrinsic that has 3 operands of the same type as its result.
607 const CallExpr *E,
608 unsigned IntrinsicID) {
609 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
610 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
611 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
612
613 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
614 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
615}
616
617// Emit an intrinsic that has 1 float or double operand, and 1 integer.
619 const CallExpr *E,
620 unsigned IntrinsicID) {
621 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
622 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
623
624 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
625 return CGF.Builder.CreateCall(F, {Src0, Src1});
626}
627
628// Emit an intrinsic that has overloaded integer result and fp operand.
629static Value *
631 unsigned IntrinsicID,
632 unsigned ConstrainedIntrinsicID) {
633 llvm::Type *ResultType = CGF.ConvertType(E->getType());
634 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
635
636 if (CGF.Builder.getIsFPConstrained()) {
637 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
638 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
639 {ResultType, Src0->getType()});
640 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
641 } else {
642 Function *F =
643 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
644 return CGF.Builder.CreateCall(F, Src0);
645 }
646}
647
649 llvm::Intrinsic::ID IntrinsicID) {
650 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
651 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
652
653 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
654 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
655 llvm::Function *F =
656 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
657 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
658
659 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
660 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
661 CGF.EmitStoreOfScalar(Exp, LV);
662
663 return CGF.Builder.CreateExtractValue(Call, 0);
664}
665
666/// EmitFAbs - Emit a call to @llvm.fabs().
668 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
669 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
670 Call->setDoesNotAccessMemory();
671 return Call;
672}
673
674/// Emit the computation of the sign bit for a floating point value. Returns
675/// the i1 sign bit value.
677 LLVMContext &C = CGF.CGM.getLLVMContext();
678
679 llvm::Type *Ty = V->getType();
680 int Width = Ty->getPrimitiveSizeInBits();
681 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
682 V = CGF.Builder.CreateBitCast(V, IntTy);
683 if (Ty->isPPC_FP128Ty()) {
684 // We want the sign bit of the higher-order double. The bitcast we just
685 // did works as if the double-double was stored to memory and then
686 // read as an i128. The "store" will put the higher-order double in the
687 // lower address in both little- and big-Endian modes, but the "load"
688 // will treat those bits as a different part of the i128: the low bits in
689 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
690 // we need to shift the high bits down to the low before truncating.
691 Width >>= 1;
692 if (CGF.getTarget().isBigEndian()) {
693 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
694 V = CGF.Builder.CreateLShr(V, ShiftCst);
695 }
696 // We are truncating value in order to extract the higher-order
697 // double, which we will be using to extract the sign from.
698 IntTy = llvm::IntegerType::get(C, Width);
699 V = CGF.Builder.CreateTrunc(V, IntTy);
700 }
701 Value *Zero = llvm::Constant::getNullValue(IntTy);
702 return CGF.Builder.CreateICmpSLT(V, Zero);
703}
704
706 const CallExpr *E, llvm::Constant *calleeValue) {
707 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
708 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
709}
710
711/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
712/// depending on IntrinsicID.
713///
714/// \arg CGF The current codegen function.
715/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
716/// \arg X The first argument to the llvm.*.with.overflow.*.
717/// \arg Y The second argument to the llvm.*.with.overflow.*.
718/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
719/// \returns The result (i.e. sum/product) returned by the intrinsic.
720static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
721 const llvm::Intrinsic::ID IntrinsicID,
722 llvm::Value *X, llvm::Value *Y,
723 llvm::Value *&Carry) {
724 // Make sure we have integers of the same width.
725 assert(X->getType() == Y->getType() &&
726 "Arguments must be the same type. (Did you forget to make sure both "
727 "arguments have the same integer width?)");
728
729 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
730 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
731 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
732 return CGF.Builder.CreateExtractValue(Tmp, 0);
733}
734
736 unsigned IntrinsicID,
737 int low, int high) {
738 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
739 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
740 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
741 llvm::Instruction *Call = CGF.Builder.CreateCall(F);
742 Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
743 Call->setMetadata(llvm::LLVMContext::MD_noundef,
744 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
745 return Call;
746}
747
748namespace {
749 struct WidthAndSignedness {
750 unsigned Width;
751 bool Signed;
752 };
753}
754
755static WidthAndSignedness
757 const clang::QualType Type) {
758 assert(Type->isIntegerType() && "Given type is not an integer.");
759 unsigned Width = Type->isBooleanType() ? 1
760 : Type->isBitIntType() ? context.getIntWidth(Type)
761 : context.getTypeInfo(Type).Width;
763 return {Width, Signed};
764}
765
766// Given one or more integer types, this function produces an integer type that
767// encompasses them: any value in one of the given types could be expressed in
768// the encompassing type.
769static struct WidthAndSignedness
770EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
771 assert(Types.size() > 0 && "Empty list of types.");
772
773 // If any of the given types is signed, we must return a signed type.
774 bool Signed = false;
775 for (const auto &Type : Types) {
776 Signed |= Type.Signed;
777 }
778
779 // The encompassing type must have a width greater than or equal to the width
780 // of the specified types. Additionally, if the encompassing type is signed,
781 // its width must be strictly greater than the width of any unsigned types
782 // given.
783 unsigned Width = 0;
784 for (const auto &Type : Types) {
785 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
786 if (Width < MinWidth) {
787 Width = MinWidth;
788 }
789 }
790
791 return {Width, Signed};
792}
793
794Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
795 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
796 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
797 ArgValue);
798}
799
800/// Checks if using the result of __builtin_object_size(p, @p From) in place of
801/// __builtin_object_size(p, @p To) is correct
802static bool areBOSTypesCompatible(int From, int To) {
803 // Note: Our __builtin_object_size implementation currently treats Type=0 and
804 // Type=2 identically. Encoding this implementation detail here may make
805 // improving __builtin_object_size difficult in the future, so it's omitted.
806 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
807}
808
809static llvm::Value *
810getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
811 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
812}
813
814llvm::Value *
815CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
816 llvm::IntegerType *ResType,
817 llvm::Value *EmittedE,
818 bool IsDynamic) {
819 uint64_t ObjectSize;
820 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
821 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
822 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
823}
824
826 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
827 uint64_t &Offset) {
828 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
829 getLangOpts().getStrictFlexArraysLevel();
830 uint32_t FieldNo = 0;
831
832 if (RD->isImplicit())
833 return nullptr;
834
835 for (const FieldDecl *FD : RD->fields()) {
836 if ((!FAMDecl || FD == FAMDecl) &&
838 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
839 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
840 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
841 Offset += Layout.getFieldOffset(FieldNo);
842 return FD;
843 }
844
845 QualType Ty = FD->getType();
846 if (Ty->isRecordType()) {
848 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
849 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
850 Offset += Layout.getFieldOffset(FieldNo);
851 return Field;
852 }
853 }
854
855 if (!RD->isUnion())
856 ++FieldNo;
857 }
858
859 return nullptr;
860}
861
862static unsigned CountCountedByAttrs(const RecordDecl *RD) {
863 unsigned Num = 0;
864
865 for (const FieldDecl *FD : RD->fields()) {
866 if (FD->getType()->isCountAttributedType())
867 return ++Num;
868
869 QualType Ty = FD->getType();
870 if (Ty->isRecordType())
872 }
873
874 return Num;
875}
876
877llvm::Value *
878CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
879 llvm::IntegerType *ResType) {
880 // The code generated here calculates the size of a struct with a flexible
881 // array member that uses the counted_by attribute. There are two instances
882 // we handle:
883 //
884 // struct s {
885 // unsigned long flags;
886 // int count;
887 // int array[] __attribute__((counted_by(count)));
888 // }
889 //
890 // 1) bdos of the flexible array itself:
891 //
892 // __builtin_dynamic_object_size(p->array, 1) ==
893 // p->count * sizeof(*p->array)
894 //
895 // 2) bdos of a pointer into the flexible array:
896 //
897 // __builtin_dynamic_object_size(&p->array[42], 1) ==
898 // (p->count - 42) * sizeof(*p->array)
899 //
900 // 2) bdos of the whole struct, including the flexible array:
901 //
902 // __builtin_dynamic_object_size(p, 1) ==
903 // max(sizeof(struct s),
904 // offsetof(struct s, array) + p->count * sizeof(*p->array))
905 //
906 ASTContext &Ctx = getContext();
907 const Expr *Base = E->IgnoreParenImpCasts();
908 const Expr *Idx = nullptr;
909
910 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
911 UO && UO->getOpcode() == UO_AddrOf) {
912 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
913 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
914 Base = ASE->getBase()->IgnoreParenImpCasts();
915 Idx = ASE->getIdx()->IgnoreParenImpCasts();
916
917 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
918 int64_t Val = IL->getValue().getSExtValue();
919 if (Val < 0)
921
922 if (Val == 0)
923 // The index is 0, so we don't need to take it into account.
924 Idx = nullptr;
925 }
926 } else {
927 // Potential pointer to another element in the struct.
928 Base = SubExpr;
929 }
930 }
931
932 // Get the flexible array member Decl.
933 const RecordDecl *OuterRD = nullptr;
934 const FieldDecl *FAMDecl = nullptr;
935 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
936 // Check if \p Base is referencing the FAM itself.
937 const ValueDecl *VD = ME->getMemberDecl();
939 FAMDecl = dyn_cast<FieldDecl>(VD);
940 if (!FAMDecl)
941 return nullptr;
942 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
943 // Check if we're pointing to the whole struct.
944 QualType Ty = DRE->getDecl()->getType();
945 if (Ty->isPointerType())
946 Ty = Ty->getPointeeType();
947 OuterRD = Ty->getAsRecordDecl();
948
949 // If we have a situation like this:
950 //
951 // struct union_of_fams {
952 // int flags;
953 // union {
954 // signed char normal_field;
955 // struct {
956 // int count1;
957 // int arr1[] __counted_by(count1);
958 // };
959 // struct {
960 // signed char count2;
961 // int arr2[] __counted_by(count2);
962 // };
963 // };
964 // };
965 //
966 // We don't know which 'count' to use in this scenario:
967 //
968 // size_t get_size(struct union_of_fams *p) {
969 // return __builtin_dynamic_object_size(p, 1);
970 // }
971 //
972 // Instead of calculating a wrong number, we give up.
973 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
974 return nullptr;
975 }
976
977 if (!OuterRD)
978 return nullptr;
979
980 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
981 // get its offset.
982 uint64_t Offset = 0;
983 FAMDecl =
984 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
985 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
986
987 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
988 // No flexible array member found or it doesn't have the "counted_by"
989 // attribute.
990 return nullptr;
991
992 const FieldDecl *CountedByFD = FindCountedByField(FAMDecl);
993 if (!CountedByFD)
994 // Can't find the field referenced by the "counted_by" attribute.
995 return nullptr;
996
997 // Build a load of the counted_by field.
998 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
999 Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD);
1000 if (!CountedByInst)
1001 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1002
1003 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1004
1005 // Build a load of the index and subtract it from the count.
1006 Value *IdxInst = nullptr;
1007 if (Idx) {
1008 if (Idx->HasSideEffects(getContext()))
1009 // We can't have side-effects.
1010 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1011
1012 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1013 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1014 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1015
1016 // We go ahead with the calculation here. If the index turns out to be
1017 // negative, we'll catch it at the end.
1018 CountedByInst =
1019 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1020 }
1021
1022 // Calculate how large the flexible array member is in bytes.
1023 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1025 llvm::Constant *ElemSize =
1026 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1027 Value *FAMSize =
1028 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1029 FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned);
1030 Value *Res = FAMSize;
1031
1032 if (isa<DeclRefExpr>(Base)) {
1033 // The whole struct is specificed in the __bdos.
1034 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
1035
1036 // Get the offset of the FAM.
1037 llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned);
1038 Value *OffsetAndFAMSize =
1039 Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
1040
1041 // Get the full size of the struct.
1042 llvm::Constant *SizeofStruct =
1043 ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
1044
1045 // max(sizeof(struct s),
1046 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1047 Res = IsSigned
1048 ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
1049 OffsetAndFAMSize, SizeofStruct)
1050 : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
1051 OffsetAndFAMSize, SizeofStruct);
1052 }
1053
1054 // A negative \p IdxInst or \p CountedByInst means that the index lands
1055 // outside of the flexible array member. If that's the case, we want to
1056 // return 0.
1057 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1058 if (IdxInst)
1059 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1060
1061 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1062}
1063
1064/// Returns a Value corresponding to the size of the given expression.
1065/// This Value may be either of the following:
1066/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1067/// it)
1068/// - A call to the @llvm.objectsize intrinsic
1069///
1070/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1071/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1072/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1073llvm::Value *
1074CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1075 llvm::IntegerType *ResType,
1076 llvm::Value *EmittedE, bool IsDynamic) {
1077 // We need to reference an argument if the pointer is a parameter with the
1078 // pass_object_size attribute.
1079 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1080 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1081 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1082 if (Param != nullptr && PS != nullptr &&
1083 areBOSTypesCompatible(PS->getType(), Type)) {
1084 auto Iter = SizeArguments.find(Param);
1085 assert(Iter != SizeArguments.end());
1086
1087 const ImplicitParamDecl *D = Iter->second;
1088 auto DIter = LocalDeclMap.find(D);
1089 assert(DIter != LocalDeclMap.end());
1090
1091 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1092 getContext().getSizeType(), E->getBeginLoc());
1093 }
1094 }
1095
1096 if (IsDynamic) {
1097 // Emit special code for a flexible array member with the "counted_by"
1098 // attribute.
1099 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1100 return V;
1101 }
1102
1103 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1104 // evaluate E for side-effects. In either case, we shouldn't lower to
1105 // @llvm.objectsize.
1106 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1107 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1108
1109 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1110 assert(Ptr->getType()->isPointerTy() &&
1111 "Non-pointer passed to __builtin_object_size?");
1112
1113 Function *F =
1114 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1115
1116 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1117 Value *Min = Builder.getInt1((Type & 2) != 0);
1118 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1119 Value *NullIsUnknown = Builder.getTrue();
1120 Value *Dynamic = Builder.getInt1(IsDynamic);
1121 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1122}
1123
1124namespace {
1125/// A struct to generically describe a bit test intrinsic.
1126struct BitTest {
1127 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1128 enum InterlockingKind : uint8_t {
1129 Unlocked,
1130 Sequential,
1131 Acquire,
1132 Release,
1133 NoFence
1134 };
1135
1136 ActionKind Action;
1137 InterlockingKind Interlocking;
1138 bool Is64Bit;
1139
1140 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1141};
1142
1143// Returns the first convergence entry/loop/anchor instruction found in |BB|.
1144// std::nullptr otherwise.
1145llvm::IntrinsicInst *getConvergenceToken(llvm::BasicBlock *BB) {
1146 for (auto &I : *BB) {
1147 auto *II = dyn_cast<llvm::IntrinsicInst>(&I);
1148 if (II && isConvergenceControlIntrinsic(II->getIntrinsicID()))
1149 return II;
1150 }
1151 return nullptr;
1152}
1153
1154} // namespace
1155
1156llvm::CallBase *
1157CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input,
1158 llvm::Value *ParentToken) {
1159 llvm::Value *bundleArgs[] = {ParentToken};
1160 llvm::OperandBundleDef OB("convergencectrl", bundleArgs);
1161 auto Output = llvm::CallBase::addOperandBundle(
1162 Input, llvm::LLVMContext::OB_convergencectrl, OB, Input);
1163 Input->replaceAllUsesWith(Output);
1164 Input->eraseFromParent();
1165 return Output;
1166}
1167
1168llvm::IntrinsicInst *
1169CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB,
1170 llvm::Value *ParentToken) {
1171 CGBuilderTy::InsertPoint IP = Builder.saveIP();
1172 Builder.SetInsertPoint(&BB->front());
1173 auto CB = Builder.CreateIntrinsic(
1174 llvm::Intrinsic::experimental_convergence_loop, {}, {});
1175 Builder.restoreIP(IP);
1176
1177 auto I = addConvergenceControlToken(CB, ParentToken);
1178 return cast<llvm::IntrinsicInst>(I);
1179}
1180
1181llvm::IntrinsicInst *
1182CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) {
1183 auto *BB = &F->getEntryBlock();
1184 auto *token = getConvergenceToken(BB);
1185 if (token)
1186 return token;
1187
1188 // Adding a convergence token requires the function to be marked as
1189 // convergent.
1190 F->setConvergent();
1191
1192 CGBuilderTy::InsertPoint IP = Builder.saveIP();
1193 Builder.SetInsertPoint(&BB->front());
1194 auto I = Builder.CreateIntrinsic(
1195 llvm::Intrinsic::experimental_convergence_entry, {}, {});
1196 assert(isa<llvm::IntrinsicInst>(I));
1197 Builder.restoreIP(IP);
1198
1199 return cast<llvm::IntrinsicInst>(I);
1200}
1201
1202llvm::IntrinsicInst *
1203CodeGenFunction::getOrEmitConvergenceLoopToken(const LoopInfo *LI) {
1204 assert(LI != nullptr);
1205
1206 auto *token = getConvergenceToken(LI->getHeader());
1207 if (token)
1208 return token;
1209
1210 llvm::IntrinsicInst *PII =
1211 LI->getParent()
1212 ? emitConvergenceLoopToken(
1213 LI->getHeader(), getOrEmitConvergenceLoopToken(LI->getParent()))
1214 : getOrEmitConvergenceEntryToken(LI->getHeader()->getParent());
1215
1216 return emitConvergenceLoopToken(LI->getHeader(), PII);
1217}
1218
1219llvm::CallBase *
1221 llvm::Value *ParentToken =
1223 ? getOrEmitConvergenceLoopToken(&LoopStack.getInfo())
1224 : getOrEmitConvergenceEntryToken(Input->getFunction());
1225 return addConvergenceControlToken(Input, ParentToken);
1226}
1227
1228BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1229 switch (BuiltinID) {
1230 // Main portable variants.
1231 case Builtin::BI_bittest:
1232 return {TestOnly, Unlocked, false};
1233 case Builtin::BI_bittestandcomplement:
1234 return {Complement, Unlocked, false};
1235 case Builtin::BI_bittestandreset:
1236 return {Reset, Unlocked, false};
1237 case Builtin::BI_bittestandset:
1238 return {Set, Unlocked, false};
1239 case Builtin::BI_interlockedbittestandreset:
1240 return {Reset, Sequential, false};
1241 case Builtin::BI_interlockedbittestandset:
1242 return {Set, Sequential, false};
1243
1244 // X86-specific 64-bit variants.
1245 case Builtin::BI_bittest64:
1246 return {TestOnly, Unlocked, true};
1247 case Builtin::BI_bittestandcomplement64:
1248 return {Complement, Unlocked, true};
1249 case Builtin::BI_bittestandreset64:
1250 return {Reset, Unlocked, true};
1251 case Builtin::BI_bittestandset64:
1252 return {Set, Unlocked, true};
1253 case Builtin::BI_interlockedbittestandreset64:
1254 return {Reset, Sequential, true};
1255 case Builtin::BI_interlockedbittestandset64:
1256 return {Set, Sequential, true};
1257
1258 // ARM/AArch64-specific ordering variants.
1259 case Builtin::BI_interlockedbittestandset_acq:
1260 return {Set, Acquire, false};
1261 case Builtin::BI_interlockedbittestandset_rel:
1262 return {Set, Release, false};
1263 case Builtin::BI_interlockedbittestandset_nf:
1264 return {Set, NoFence, false};
1265 case Builtin::BI_interlockedbittestandreset_acq:
1266 return {Reset, Acquire, false};
1267 case Builtin::BI_interlockedbittestandreset_rel:
1268 return {Reset, Release, false};
1269 case Builtin::BI_interlockedbittestandreset_nf:
1270 return {Reset, NoFence, false};
1271 }
1272 llvm_unreachable("expected only bittest intrinsics");
1273}
1274
1275static char bitActionToX86BTCode(BitTest::ActionKind A) {
1276 switch (A) {
1277 case BitTest::TestOnly: return '\0';
1278 case BitTest::Complement: return 'c';
1279 case BitTest::Reset: return 'r';
1280 case BitTest::Set: return 's';
1281 }
1282 llvm_unreachable("invalid action");
1283}
1284
1286 BitTest BT,
1287 const CallExpr *E, Value *BitBase,
1288 Value *BitPos) {
1289 char Action = bitActionToX86BTCode(BT.Action);
1290 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1291
1292 // Build the assembly.
1294 raw_svector_ostream AsmOS(Asm);
1295 if (BT.Interlocking != BitTest::Unlocked)
1296 AsmOS << "lock ";
1297 AsmOS << "bt";
1298 if (Action)
1299 AsmOS << Action;
1300 AsmOS << SizeSuffix << " $2, ($1)";
1301
1302 // Build the constraints. FIXME: We should support immediates when possible.
1303 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1304 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1305 if (!MachineClobbers.empty()) {
1306 Constraints += ',';
1307 Constraints += MachineClobbers;
1308 }
1309 llvm::IntegerType *IntType = llvm::IntegerType::get(
1310 CGF.getLLVMContext(),
1311 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1312 llvm::FunctionType *FTy =
1313 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1314
1315 llvm::InlineAsm *IA =
1316 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1317 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1318}
1319
1320static llvm::AtomicOrdering
1321getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1322 switch (I) {
1323 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1324 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1325 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1326 case BitTest::Release: return llvm::AtomicOrdering::Release;
1327 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1328 }
1329 llvm_unreachable("invalid interlocking");
1330}
1331
1332/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1333/// bits and a bit position and read and optionally modify the bit at that
1334/// position. The position index can be arbitrarily large, i.e. it can be larger
1335/// than 31 or 63, so we need an indexed load in the general case.
1336static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1337 unsigned BuiltinID,
1338 const CallExpr *E) {
1339 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1340 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1341
1342 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1343
1344 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1345 // indexing operation internally. Use them if possible.
1346 if (CGF.getTarget().getTriple().isX86())
1347 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1348
1349 // Otherwise, use generic code to load one byte and test the bit. Use all but
1350 // the bottom three bits as the array index, and the bottom three bits to form
1351 // a mask.
1352 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1353 Value *ByteIndex = CGF.Builder.CreateAShr(
1354 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1355 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
1356 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
1357 ByteIndex, "bittest.byteaddr"),
1358 CGF.Int8Ty, CharUnits::One());
1359 Value *PosLow =
1360 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1361 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1362
1363 // The updating instructions will need a mask.
1364 Value *Mask = nullptr;
1365 if (BT.Action != BitTest::TestOnly) {
1366 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1367 "bittest.mask");
1368 }
1369
1370 // Check the action and ordering of the interlocked intrinsics.
1371 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1372
1373 Value *OldByte = nullptr;
1374 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1375 // Emit a combined atomicrmw load/store operation for the interlocked
1376 // intrinsics.
1377 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1378 if (BT.Action == BitTest::Reset) {
1379 Mask = CGF.Builder.CreateNot(Mask);
1380 RMWOp = llvm::AtomicRMWInst::And;
1381 }
1382 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1383 } else {
1384 // Emit a plain load for the non-interlocked intrinsics.
1385 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1386 Value *NewByte = nullptr;
1387 switch (BT.Action) {
1388 case BitTest::TestOnly:
1389 // Don't store anything.
1390 break;
1391 case BitTest::Complement:
1392 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1393 break;
1394 case BitTest::Reset:
1395 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1396 break;
1397 case BitTest::Set:
1398 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1399 break;
1400 }
1401 if (NewByte)
1402 CGF.Builder.CreateStore(NewByte, ByteAddr);
1403 }
1404
1405 // However we loaded the old byte, either by plain load or atomicrmw, shift
1406 // the bit into the low position and mask it to 0 or 1.
1407 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1408 return CGF.Builder.CreateAnd(
1409 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1410}
1411
1413 unsigned BuiltinID,
1414 const CallExpr *E) {
1415 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1416
1418 raw_svector_ostream AsmOS(Asm);
1419 llvm::IntegerType *RetType = CGF.Int32Ty;
1420
1421 switch (BuiltinID) {
1422 case clang::PPC::BI__builtin_ppc_ldarx:
1423 AsmOS << "ldarx ";
1424 RetType = CGF.Int64Ty;
1425 break;
1426 case clang::PPC::BI__builtin_ppc_lwarx:
1427 AsmOS << "lwarx ";
1428 RetType = CGF.Int32Ty;
1429 break;
1430 case clang::PPC::BI__builtin_ppc_lharx:
1431 AsmOS << "lharx ";
1432 RetType = CGF.Int16Ty;
1433 break;
1434 case clang::PPC::BI__builtin_ppc_lbarx:
1435 AsmOS << "lbarx ";
1436 RetType = CGF.Int8Ty;
1437 break;
1438 default:
1439 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1440 }
1441
1442 AsmOS << "$0, ${1:y}";
1443
1444 std::string Constraints = "=r,*Z,~{memory}";
1445 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1446 if (!MachineClobbers.empty()) {
1447 Constraints += ',';
1448 Constraints += MachineClobbers;
1449 }
1450
1451 llvm::Type *PtrType = CGF.UnqualPtrTy;
1452 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1453
1454 llvm::InlineAsm *IA =
1455 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1456 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1457 CI->addParamAttr(
1458 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1459 return CI;
1460}
1461
1462namespace {
1463enum class MSVCSetJmpKind {
1464 _setjmpex,
1465 _setjmp3,
1466 _setjmp
1467};
1468}
1469
1470/// MSVC handles setjmp a bit differently on different platforms. On every
1471/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1472/// parameters can be passed as variadic arguments, but we always pass none.
1473static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1474 const CallExpr *E) {
1475 llvm::Value *Arg1 = nullptr;
1476 llvm::Type *Arg1Ty = nullptr;
1477 StringRef Name;
1478 bool IsVarArg = false;
1479 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1480 Name = "_setjmp3";
1481 Arg1Ty = CGF.Int32Ty;
1482 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1483 IsVarArg = true;
1484 } else {
1485 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1486 Arg1Ty = CGF.Int8PtrTy;
1487 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1488 Arg1 = CGF.Builder.CreateCall(
1489 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1490 } else
1491 Arg1 = CGF.Builder.CreateCall(
1492 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1493 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1494 }
1495
1496 // Mark the call site and declaration with ReturnsTwice.
1497 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1498 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1499 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1500 llvm::Attribute::ReturnsTwice);
1501 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1502 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1503 ReturnsTwiceAttr, /*Local=*/true);
1504
1505 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1506 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1507 llvm::Value *Args[] = {Buf, Arg1};
1508 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1509 CB->setAttributes(ReturnsTwiceAttr);
1510 return RValue::get(CB);
1511}
1512
1513// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1514// we handle them here.
1554 __fastfail,
1555};
1556
1557static std::optional<CodeGenFunction::MSVCIntrin>
1558translateArmToMsvcIntrin(unsigned BuiltinID) {
1559 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1560 switch (BuiltinID) {
1561 default:
1562 return std::nullopt;
1563 case clang::ARM::BI_BitScanForward:
1564 case clang::ARM::BI_BitScanForward64:
1565 return MSVCIntrin::_BitScanForward;
1566 case clang::ARM::BI_BitScanReverse:
1567 case clang::ARM::BI_BitScanReverse64:
1568 return MSVCIntrin::_BitScanReverse;
1569 case clang::ARM::BI_InterlockedAnd64:
1570 return MSVCIntrin::_InterlockedAnd;
1571 case clang::ARM::BI_InterlockedExchange64:
1572 return MSVCIntrin::_InterlockedExchange;
1573 case clang::ARM::BI_InterlockedExchangeAdd64:
1574 return MSVCIntrin::_InterlockedExchangeAdd;
1575 case clang::ARM::BI_InterlockedExchangeSub64:
1576 return MSVCIntrin::_InterlockedExchangeSub;
1577 case clang::ARM::BI_InterlockedOr64:
1578 return MSVCIntrin::_InterlockedOr;
1579 case clang::ARM::BI_InterlockedXor64:
1580 return MSVCIntrin::_InterlockedXor;
1581 case clang::ARM::BI_InterlockedDecrement64:
1582 return MSVCIntrin::_InterlockedDecrement;
1583 case clang::ARM::BI_InterlockedIncrement64:
1584 return MSVCIntrin::_InterlockedIncrement;
1585 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1586 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1587 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1588 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1589 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1590 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1591 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1592 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1593 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1594 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1595 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1596 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1597 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1598 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1599 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1600 case clang::ARM::BI_InterlockedExchange8_acq:
1601 case clang::ARM::BI_InterlockedExchange16_acq:
1602 case clang::ARM::BI_InterlockedExchange_acq:
1603 case clang::ARM::BI_InterlockedExchange64_acq:
1604 return MSVCIntrin::_InterlockedExchange_acq;
1605 case clang::ARM::BI_InterlockedExchange8_rel:
1606 case clang::ARM::BI_InterlockedExchange16_rel:
1607 case clang::ARM::BI_InterlockedExchange_rel:
1608 case clang::ARM::BI_InterlockedExchange64_rel:
1609 return MSVCIntrin::_InterlockedExchange_rel;
1610 case clang::ARM::BI_InterlockedExchange8_nf:
1611 case clang::ARM::BI_InterlockedExchange16_nf:
1612 case clang::ARM::BI_InterlockedExchange_nf:
1613 case clang::ARM::BI_InterlockedExchange64_nf:
1614 return MSVCIntrin::_InterlockedExchange_nf;
1615 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1616 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1617 case clang::ARM::BI_InterlockedCompareExchange_acq:
1618 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1619 return MSVCIntrin::_InterlockedCompareExchange_acq;
1620 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1621 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1622 case clang::ARM::BI_InterlockedCompareExchange_rel:
1623 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1624 return MSVCIntrin::_InterlockedCompareExchange_rel;
1625 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1626 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1627 case clang::ARM::BI_InterlockedCompareExchange_nf:
1628 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1629 return MSVCIntrin::_InterlockedCompareExchange_nf;
1630 case clang::ARM::BI_InterlockedOr8_acq:
1631 case clang::ARM::BI_InterlockedOr16_acq:
1632 case clang::ARM::BI_InterlockedOr_acq:
1633 case clang::ARM::BI_InterlockedOr64_acq:
1634 return MSVCIntrin::_InterlockedOr_acq;
1635 case clang::ARM::BI_InterlockedOr8_rel:
1636 case clang::ARM::BI_InterlockedOr16_rel:
1637 case clang::ARM::BI_InterlockedOr_rel:
1638 case clang::ARM::BI_InterlockedOr64_rel:
1639 return MSVCIntrin::_InterlockedOr_rel;
1640 case clang::ARM::BI_InterlockedOr8_nf:
1641 case clang::ARM::BI_InterlockedOr16_nf:
1642 case clang::ARM::BI_InterlockedOr_nf:
1643 case clang::ARM::BI_InterlockedOr64_nf:
1644 return MSVCIntrin::_InterlockedOr_nf;
1645 case clang::ARM::BI_InterlockedXor8_acq:
1646 case clang::ARM::BI_InterlockedXor16_acq:
1647 case clang::ARM::BI_InterlockedXor_acq:
1648 case clang::ARM::BI_InterlockedXor64_acq:
1649 return MSVCIntrin::_InterlockedXor_acq;
1650 case clang::ARM::BI_InterlockedXor8_rel:
1651 case clang::ARM::BI_InterlockedXor16_rel:
1652 case clang::ARM::BI_InterlockedXor_rel:
1653 case clang::ARM::BI_InterlockedXor64_rel:
1654 return MSVCIntrin::_InterlockedXor_rel;
1655 case clang::ARM::BI_InterlockedXor8_nf:
1656 case clang::ARM::BI_InterlockedXor16_nf:
1657 case clang::ARM::BI_InterlockedXor_nf:
1658 case clang::ARM::BI_InterlockedXor64_nf:
1659 return MSVCIntrin::_InterlockedXor_nf;
1660 case clang::ARM::BI_InterlockedAnd8_acq:
1661 case clang::ARM::BI_InterlockedAnd16_acq:
1662 case clang::ARM::BI_InterlockedAnd_acq:
1663 case clang::ARM::BI_InterlockedAnd64_acq:
1664 return MSVCIntrin::_InterlockedAnd_acq;
1665 case clang::ARM::BI_InterlockedAnd8_rel:
1666 case clang::ARM::BI_InterlockedAnd16_rel:
1667 case clang::ARM::BI_InterlockedAnd_rel:
1668 case clang::ARM::BI_InterlockedAnd64_rel:
1669 return MSVCIntrin::_InterlockedAnd_rel;
1670 case clang::ARM::BI_InterlockedAnd8_nf:
1671 case clang::ARM::BI_InterlockedAnd16_nf:
1672 case clang::ARM::BI_InterlockedAnd_nf:
1673 case clang::ARM::BI_InterlockedAnd64_nf:
1674 return MSVCIntrin::_InterlockedAnd_nf;
1675 case clang::ARM::BI_InterlockedIncrement16_acq:
1676 case clang::ARM::BI_InterlockedIncrement_acq:
1677 case clang::ARM::BI_InterlockedIncrement64_acq:
1678 return MSVCIntrin::_InterlockedIncrement_acq;
1679 case clang::ARM::BI_InterlockedIncrement16_rel:
1680 case clang::ARM::BI_InterlockedIncrement_rel:
1681 case clang::ARM::BI_InterlockedIncrement64_rel:
1682 return MSVCIntrin::_InterlockedIncrement_rel;
1683 case clang::ARM::BI_InterlockedIncrement16_nf:
1684 case clang::ARM::BI_InterlockedIncrement_nf:
1685 case clang::ARM::BI_InterlockedIncrement64_nf:
1686 return MSVCIntrin::_InterlockedIncrement_nf;
1687 case clang::ARM::BI_InterlockedDecrement16_acq:
1688 case clang::ARM::BI_InterlockedDecrement_acq:
1689 case clang::ARM::BI_InterlockedDecrement64_acq:
1690 return MSVCIntrin::_InterlockedDecrement_acq;
1691 case clang::ARM::BI_InterlockedDecrement16_rel:
1692 case clang::ARM::BI_InterlockedDecrement_rel:
1693 case clang::ARM::BI_InterlockedDecrement64_rel:
1694 return MSVCIntrin::_InterlockedDecrement_rel;
1695 case clang::ARM::BI_InterlockedDecrement16_nf:
1696 case clang::ARM::BI_InterlockedDecrement_nf:
1697 case clang::ARM::BI_InterlockedDecrement64_nf:
1698 return MSVCIntrin::_InterlockedDecrement_nf;
1699 }
1700 llvm_unreachable("must return from switch");
1701}
1702
1703static std::optional<CodeGenFunction::MSVCIntrin>
1704translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1705 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1706 switch (BuiltinID) {
1707 default:
1708 return std::nullopt;
1709 case clang::AArch64::BI_BitScanForward:
1710 case clang::AArch64::BI_BitScanForward64:
1711 return MSVCIntrin::_BitScanForward;
1712 case clang::AArch64::BI_BitScanReverse:
1713 case clang::AArch64::BI_BitScanReverse64:
1714 return MSVCIntrin::_BitScanReverse;
1715 case clang::AArch64::BI_InterlockedAnd64:
1716 return MSVCIntrin::_InterlockedAnd;
1717 case clang::AArch64::BI_InterlockedExchange64:
1718 return MSVCIntrin::_InterlockedExchange;
1719 case clang::AArch64::BI_InterlockedExchangeAdd64:
1720 return MSVCIntrin::_InterlockedExchangeAdd;
1721 case clang::AArch64::BI_InterlockedExchangeSub64:
1722 return MSVCIntrin::_InterlockedExchangeSub;
1723 case clang::AArch64::BI_InterlockedOr64:
1724 return MSVCIntrin::_InterlockedOr;
1725 case clang::AArch64::BI_InterlockedXor64:
1726 return MSVCIntrin::_InterlockedXor;
1727 case clang::AArch64::BI_InterlockedDecrement64:
1728 return MSVCIntrin::_InterlockedDecrement;
1729 case clang::AArch64::BI_InterlockedIncrement64:
1730 return MSVCIntrin::_InterlockedIncrement;
1731 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1732 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1733 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1734 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1735 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1736 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1737 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1738 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1739 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1740 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1741 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1742 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1743 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1744 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1745 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1746 case clang::AArch64::BI_InterlockedExchange8_acq:
1747 case clang::AArch64::BI_InterlockedExchange16_acq:
1748 case clang::AArch64::BI_InterlockedExchange_acq:
1749 case clang::AArch64::BI_InterlockedExchange64_acq:
1750 return MSVCIntrin::_InterlockedExchange_acq;
1751 case clang::AArch64::BI_InterlockedExchange8_rel:
1752 case clang::AArch64::BI_InterlockedExchange16_rel:
1753 case clang::AArch64::BI_InterlockedExchange_rel:
1754 case clang::AArch64::BI_InterlockedExchange64_rel:
1755 return MSVCIntrin::_InterlockedExchange_rel;
1756 case clang::AArch64::BI_InterlockedExchange8_nf:
1757 case clang::AArch64::BI_InterlockedExchange16_nf:
1758 case clang::AArch64::BI_InterlockedExchange_nf:
1759 case clang::AArch64::BI_InterlockedExchange64_nf:
1760 return MSVCIntrin::_InterlockedExchange_nf;
1761 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1762 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1763 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1764 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1765 return MSVCIntrin::_InterlockedCompareExchange_acq;
1766 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1767 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1768 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1769 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1770 return MSVCIntrin::_InterlockedCompareExchange_rel;
1771 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1772 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1773 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1774 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1775 return MSVCIntrin::_InterlockedCompareExchange_nf;
1776 case clang::AArch64::BI_InterlockedCompareExchange128:
1777 return MSVCIntrin::_InterlockedCompareExchange128;
1778 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1779 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1780 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1781 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1782 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1783 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1784 case clang::AArch64::BI_InterlockedOr8_acq:
1785 case clang::AArch64::BI_InterlockedOr16_acq:
1786 case clang::AArch64::BI_InterlockedOr_acq:
1787 case clang::AArch64::BI_InterlockedOr64_acq:
1788 return MSVCIntrin::_InterlockedOr_acq;
1789 case clang::AArch64::BI_InterlockedOr8_rel:
1790 case clang::AArch64::BI_InterlockedOr16_rel:
1791 case clang::AArch64::BI_InterlockedOr_rel:
1792 case clang::AArch64::BI_InterlockedOr64_rel:
1793 return MSVCIntrin::_InterlockedOr_rel;
1794 case clang::AArch64::BI_InterlockedOr8_nf:
1795 case clang::AArch64::BI_InterlockedOr16_nf:
1796 case clang::AArch64::BI_InterlockedOr_nf:
1797 case clang::AArch64::BI_InterlockedOr64_nf:
1798 return MSVCIntrin::_InterlockedOr_nf;
1799 case clang::AArch64::BI_InterlockedXor8_acq:
1800 case clang::AArch64::BI_InterlockedXor16_acq:
1801 case clang::AArch64::BI_InterlockedXor_acq:
1802 case clang::AArch64::BI_InterlockedXor64_acq:
1803 return MSVCIntrin::_InterlockedXor_acq;
1804 case clang::AArch64::BI_InterlockedXor8_rel:
1805 case clang::AArch64::BI_InterlockedXor16_rel:
1806 case clang::AArch64::BI_InterlockedXor_rel:
1807 case clang::AArch64::BI_InterlockedXor64_rel:
1808 return MSVCIntrin::_InterlockedXor_rel;
1809 case clang::AArch64::BI_InterlockedXor8_nf:
1810 case clang::AArch64::BI_InterlockedXor16_nf:
1811 case clang::AArch64::BI_InterlockedXor_nf:
1812 case clang::AArch64::BI_InterlockedXor64_nf:
1813 return MSVCIntrin::_InterlockedXor_nf;
1814 case clang::AArch64::BI_InterlockedAnd8_acq:
1815 case clang::AArch64::BI_InterlockedAnd16_acq:
1816 case clang::AArch64::BI_InterlockedAnd_acq:
1817 case clang::AArch64::BI_InterlockedAnd64_acq:
1818 return MSVCIntrin::_InterlockedAnd_acq;
1819 case clang::AArch64::BI_InterlockedAnd8_rel:
1820 case clang::AArch64::BI_InterlockedAnd16_rel:
1821 case clang::AArch64::BI_InterlockedAnd_rel:
1822 case clang::AArch64::BI_InterlockedAnd64_rel:
1823 return MSVCIntrin::_InterlockedAnd_rel;
1824 case clang::AArch64::BI_InterlockedAnd8_nf:
1825 case clang::AArch64::BI_InterlockedAnd16_nf:
1826 case clang::AArch64::BI_InterlockedAnd_nf:
1827 case clang::AArch64::BI_InterlockedAnd64_nf:
1828 return MSVCIntrin::_InterlockedAnd_nf;
1829 case clang::AArch64::BI_InterlockedIncrement16_acq:
1830 case clang::AArch64::BI_InterlockedIncrement_acq:
1831 case clang::AArch64::BI_InterlockedIncrement64_acq:
1832 return MSVCIntrin::_InterlockedIncrement_acq;
1833 case clang::AArch64::BI_InterlockedIncrement16_rel:
1834 case clang::AArch64::BI_InterlockedIncrement_rel:
1835 case clang::AArch64::BI_InterlockedIncrement64_rel:
1836 return MSVCIntrin::_InterlockedIncrement_rel;
1837 case clang::AArch64::BI_InterlockedIncrement16_nf:
1838 case clang::AArch64::BI_InterlockedIncrement_nf:
1839 case clang::AArch64::BI_InterlockedIncrement64_nf:
1840 return MSVCIntrin::_InterlockedIncrement_nf;
1841 case clang::AArch64::BI_InterlockedDecrement16_acq:
1842 case clang::AArch64::BI_InterlockedDecrement_acq:
1843 case clang::AArch64::BI_InterlockedDecrement64_acq:
1844 return MSVCIntrin::_InterlockedDecrement_acq;
1845 case clang::AArch64::BI_InterlockedDecrement16_rel:
1846 case clang::AArch64::BI_InterlockedDecrement_rel:
1847 case clang::AArch64::BI_InterlockedDecrement64_rel:
1848 return MSVCIntrin::_InterlockedDecrement_rel;
1849 case clang::AArch64::BI_InterlockedDecrement16_nf:
1850 case clang::AArch64::BI_InterlockedDecrement_nf:
1851 case clang::AArch64::BI_InterlockedDecrement64_nf:
1852 return MSVCIntrin::_InterlockedDecrement_nf;
1853 }
1854 llvm_unreachable("must return from switch");
1855}
1856
1857static std::optional<CodeGenFunction::MSVCIntrin>
1858translateX86ToMsvcIntrin(unsigned BuiltinID) {
1859 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1860 switch (BuiltinID) {
1861 default:
1862 return std::nullopt;
1863 case clang::X86::BI_BitScanForward:
1864 case clang::X86::BI_BitScanForward64:
1865 return MSVCIntrin::_BitScanForward;
1866 case clang::X86::BI_BitScanReverse:
1867 case clang::X86::BI_BitScanReverse64:
1868 return MSVCIntrin::_BitScanReverse;
1869 case clang::X86::BI_InterlockedAnd64:
1870 return MSVCIntrin::_InterlockedAnd;
1871 case clang::X86::BI_InterlockedCompareExchange128:
1872 return MSVCIntrin::_InterlockedCompareExchange128;
1873 case clang::X86::BI_InterlockedExchange64:
1874 return MSVCIntrin::_InterlockedExchange;
1875 case clang::X86::BI_InterlockedExchangeAdd64:
1876 return MSVCIntrin::_InterlockedExchangeAdd;
1877 case clang::X86::BI_InterlockedExchangeSub64:
1878 return MSVCIntrin::_InterlockedExchangeSub;
1879 case clang::X86::BI_InterlockedOr64:
1880 return MSVCIntrin::_InterlockedOr;
1881 case clang::X86::BI_InterlockedXor64:
1882 return MSVCIntrin::_InterlockedXor;
1883 case clang::X86::BI_InterlockedDecrement64:
1884 return MSVCIntrin::_InterlockedDecrement;
1885 case clang::X86::BI_InterlockedIncrement64:
1886 return MSVCIntrin::_InterlockedIncrement;
1887 }
1888 llvm_unreachable("must return from switch");
1889}
1890
1891// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1892Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1893 const CallExpr *E) {
1894 switch (BuiltinID) {
1895 case MSVCIntrin::_BitScanForward:
1896 case MSVCIntrin::_BitScanReverse: {
1897 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1898 Value *ArgValue = EmitScalarExpr(E->getArg(1));
1899
1900 llvm::Type *ArgType = ArgValue->getType();
1901 llvm::Type *IndexType = IndexAddress.getElementType();
1902 llvm::Type *ResultType = ConvertType(E->getType());
1903
1904 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1905 Value *ResZero = llvm::Constant::getNullValue(ResultType);
1906 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1907
1908 BasicBlock *Begin = Builder.GetInsertBlock();
1909 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1910 Builder.SetInsertPoint(End);
1911 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1912
1913 Builder.SetInsertPoint(Begin);
1914 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1915 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1916 Builder.CreateCondBr(IsZero, End, NotZero);
1917 Result->addIncoming(ResZero, Begin);
1918
1919 Builder.SetInsertPoint(NotZero);
1920
1921 if (BuiltinID == MSVCIntrin::_BitScanForward) {
1922 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1923 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1924 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1925 Builder.CreateStore(ZeroCount, IndexAddress, false);
1926 } else {
1927 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1928 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
1929
1930 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1931 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1932 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1933 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
1934 Builder.CreateStore(Index, IndexAddress, false);
1935 }
1936 Builder.CreateBr(End);
1937 Result->addIncoming(ResOne, NotZero);
1938
1939 Builder.SetInsertPoint(End);
1940 return Result;
1941 }
1942 case MSVCIntrin::_InterlockedAnd:
1943 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
1944 case MSVCIntrin::_InterlockedExchange:
1945 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
1946 case MSVCIntrin::_InterlockedExchangeAdd:
1947 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
1948 case MSVCIntrin::_InterlockedExchangeSub:
1949 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
1950 case MSVCIntrin::_InterlockedOr:
1951 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
1952 case MSVCIntrin::_InterlockedXor:
1953 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
1954 case MSVCIntrin::_InterlockedExchangeAdd_acq:
1955 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1956 AtomicOrdering::Acquire);
1957 case MSVCIntrin::_InterlockedExchangeAdd_rel:
1958 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1959 AtomicOrdering::Release);
1960 case MSVCIntrin::_InterlockedExchangeAdd_nf:
1961 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1962 AtomicOrdering::Monotonic);
1963 case MSVCIntrin::_InterlockedExchange_acq:
1964 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1965 AtomicOrdering::Acquire);
1966 case MSVCIntrin::_InterlockedExchange_rel:
1967 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1968 AtomicOrdering::Release);
1969 case MSVCIntrin::_InterlockedExchange_nf:
1970 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1971 AtomicOrdering::Monotonic);
1972 case MSVCIntrin::_InterlockedCompareExchange_acq:
1973 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
1974 case MSVCIntrin::_InterlockedCompareExchange_rel:
1975 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
1976 case MSVCIntrin::_InterlockedCompareExchange_nf:
1977 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1978 case MSVCIntrin::_InterlockedCompareExchange128:
1980 *this, E, AtomicOrdering::SequentiallyConsistent);
1981 case MSVCIntrin::_InterlockedCompareExchange128_acq:
1982 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
1983 case MSVCIntrin::_InterlockedCompareExchange128_rel:
1984 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
1985 case MSVCIntrin::_InterlockedCompareExchange128_nf:
1986 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1987 case MSVCIntrin::_InterlockedOr_acq:
1988 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1989 AtomicOrdering::Acquire);
1990 case MSVCIntrin::_InterlockedOr_rel:
1991 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1992 AtomicOrdering::Release);
1993 case MSVCIntrin::_InterlockedOr_nf:
1994 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1995 AtomicOrdering::Monotonic);
1996 case MSVCIntrin::_InterlockedXor_acq:
1997 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1998 AtomicOrdering::Acquire);
1999 case MSVCIntrin::_InterlockedXor_rel:
2000 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2001 AtomicOrdering::Release);
2002 case MSVCIntrin::_InterlockedXor_nf:
2003 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2004 AtomicOrdering::Monotonic);
2005 case MSVCIntrin::_InterlockedAnd_acq:
2006 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2007 AtomicOrdering::Acquire);
2008 case MSVCIntrin::_InterlockedAnd_rel:
2009 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2010 AtomicOrdering::Release);
2011 case MSVCIntrin::_InterlockedAnd_nf:
2012 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2013 AtomicOrdering::Monotonic);
2014 case MSVCIntrin::_InterlockedIncrement_acq:
2015 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
2016 case MSVCIntrin::_InterlockedIncrement_rel:
2017 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
2018 case MSVCIntrin::_InterlockedIncrement_nf:
2019 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
2020 case MSVCIntrin::_InterlockedDecrement_acq:
2021 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
2022 case MSVCIntrin::_InterlockedDecrement_rel:
2023 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
2024 case MSVCIntrin::_InterlockedDecrement_nf:
2025 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
2026
2027 case MSVCIntrin::_InterlockedDecrement:
2028 return EmitAtomicDecrementValue(*this, E);
2029 case MSVCIntrin::_InterlockedIncrement:
2030 return EmitAtomicIncrementValue(*this, E);
2031
2032 case MSVCIntrin::__fastfail: {
2033 // Request immediate process termination from the kernel. The instruction
2034 // sequences to do this are documented on MSDN:
2035 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2036 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
2037 StringRef Asm, Constraints;
2038 switch (ISA) {
2039 default:
2040 ErrorUnsupported(E, "__fastfail call for this architecture");
2041 break;
2042 case llvm::Triple::x86:
2043 case llvm::Triple::x86_64:
2044 Asm = "int $$0x29";
2045 Constraints = "{cx}";
2046 break;
2047 case llvm::Triple::thumb:
2048 Asm = "udf #251";
2049 Constraints = "{r0}";
2050 break;
2051 case llvm::Triple::aarch64:
2052 Asm = "brk #0xF003";
2053 Constraints = "{w0}";
2054 }
2055 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
2056 llvm::InlineAsm *IA =
2057 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
2058 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2059 getLLVMContext(), llvm::AttributeList::FunctionIndex,
2060 llvm::Attribute::NoReturn);
2061 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
2062 CI->setAttributes(NoReturnAttr);
2063 return CI;
2064 }
2065 }
2066 llvm_unreachable("Incorrect MSVC intrinsic!");
2067}
2068
2069namespace {
2070// ARC cleanup for __builtin_os_log_format
2071struct CallObjCArcUse final : EHScopeStack::Cleanup {
2072 CallObjCArcUse(llvm::Value *object) : object(object) {}
2073 llvm::Value *object;
2074
2075 void Emit(CodeGenFunction &CGF, Flags flags) override {
2076 CGF.EmitARCIntrinsicUse(object);
2077 }
2078};
2079}
2080
2082 BuiltinCheckKind Kind) {
2083 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
2084 && "Unsupported builtin check kind");
2085
2086 Value *ArgValue = EmitScalarExpr(E);
2087 if (!SanOpts.has(SanitizerKind::Builtin))
2088 return ArgValue;
2089
2090 SanitizerScope SanScope(this);
2091 Value *Cond = Builder.CreateICmpNE(
2092 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2093 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2094 SanitizerHandler::InvalidBuiltin,
2096 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2097 std::nullopt);
2098 return ArgValue;
2099}
2100
2101static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2102 return CGF.Builder.CreateBinaryIntrinsic(
2103 Intrinsic::abs, ArgValue,
2104 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2105}
2106
2108 bool SanitizeOverflow) {
2109 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2110
2111 // Try to eliminate overflow check.
2112 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2113 if (!VCI->isMinSignedValue())
2114 return EmitAbs(CGF, ArgValue, true);
2115 }
2116
2117 CodeGenFunction::SanitizerScope SanScope(&CGF);
2118
2119 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2120 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2121 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2122 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2123 Value *NotOverflow = CGF.Builder.CreateNot(
2124 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2125
2126 // TODO: support -ftrapv-handler.
2127 if (SanitizeOverflow) {
2128 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2129 SanitizerHandler::NegateOverflow,
2132 {ArgValue});
2133 } else
2134 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2135
2136 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2137 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2138}
2139
2140/// Get the argument type for arguments to os_log_helper.
2142 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2143 return C.getCanonicalType(UnsignedTy);
2144}
2145
2148 CharUnits BufferAlignment) {
2149 ASTContext &Ctx = getContext();
2150
2152 {
2153 raw_svector_ostream OS(Name);
2154 OS << "__os_log_helper";
2155 OS << "_" << BufferAlignment.getQuantity();
2156 OS << "_" << int(Layout.getSummaryByte());
2157 OS << "_" << int(Layout.getNumArgsByte());
2158 for (const auto &Item : Layout.Items)
2159 OS << "_" << int(Item.getSizeByte()) << "_"
2160 << int(Item.getDescriptorByte());
2161 }
2162
2163 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2164 return F;
2165
2167 FunctionArgList Args;
2168 Args.push_back(ImplicitParamDecl::Create(
2169 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2171 ArgTys.emplace_back(Ctx.VoidPtrTy);
2172
2173 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2174 char Size = Layout.Items[I].getSizeByte();
2175 if (!Size)
2176 continue;
2177
2178 QualType ArgTy = getOSLogArgType(Ctx, Size);
2179 Args.push_back(ImplicitParamDecl::Create(
2180 Ctx, nullptr, SourceLocation(),
2181 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2183 ArgTys.emplace_back(ArgTy);
2184 }
2185
2186 QualType ReturnTy = Ctx.VoidTy;
2187
2188 // The helper function has linkonce_odr linkage to enable the linker to merge
2189 // identical functions. To ensure the merging always happens, 'noinline' is
2190 // attached to the function when compiling with -Oz.
2191 const CGFunctionInfo &FI =
2193 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2194 llvm::Function *Fn = llvm::Function::Create(
2195 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2196 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2197 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2199 Fn->setDoesNotThrow();
2200
2201 // Attach 'noinline' at -Oz.
2202 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2203 Fn->addFnAttr(llvm::Attribute::NoInline);
2204
2205 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2206 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2207
2208 // Create a scope with an artificial location for the body of this function.
2209 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2210
2211 CharUnits Offset;
2213 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2214 BufferAlignment);
2215 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2216 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2217 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2218 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2219
2220 unsigned I = 1;
2221 for (const auto &Item : Layout.Items) {
2223 Builder.getInt8(Item.getDescriptorByte()),
2224 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2226 Builder.getInt8(Item.getSizeByte()),
2227 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2228
2229 CharUnits Size = Item.size();
2230 if (!Size.getQuantity())
2231 continue;
2232
2233 Address Arg = GetAddrOfLocalVar(Args[I]);
2234 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2235 Addr = Addr.withElementType(Arg.getElementType());
2237 Offset += Size;
2238 ++I;
2239 }
2240
2242
2243 return Fn;
2244}
2245
2247 assert(E.getNumArgs() >= 2 &&
2248 "__builtin_os_log_format takes at least 2 arguments");
2249 ASTContext &Ctx = getContext();
2252 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2253 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2254
2255 // Ignore argument 1, the format string. It is not currently used.
2256 CallArgList Args;
2257 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2258
2259 for (const auto &Item : Layout.Items) {
2260 int Size = Item.getSizeByte();
2261 if (!Size)
2262 continue;
2263
2264 llvm::Value *ArgVal;
2265
2266 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2267 uint64_t Val = 0;
2268 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2269 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2270 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2271 } else if (const Expr *TheExpr = Item.getExpr()) {
2272 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2273
2274 // If a temporary object that requires destruction after the full
2275 // expression is passed, push a lifetime-extended cleanup to extend its
2276 // lifetime to the end of the enclosing block scope.
2277 auto LifetimeExtendObject = [&](const Expr *E) {
2278 E = E->IgnoreParenCasts();
2279 // Extend lifetimes of objects returned by function calls and message
2280 // sends.
2281
2282 // FIXME: We should do this in other cases in which temporaries are
2283 // created including arguments of non-ARC types (e.g., C++
2284 // temporaries).
2285 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2286 return true;
2287 return false;
2288 };
2289
2290 if (TheExpr->getType()->isObjCRetainableType() &&
2291 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2292 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2293 "Only scalar can be a ObjC retainable type");
2294 if (!isa<Constant>(ArgVal)) {
2295 CleanupKind Cleanup = getARCCleanupKind();
2296 QualType Ty = TheExpr->getType();
2298 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2299 ArgVal = EmitARCRetain(Ty, ArgVal);
2300 Builder.CreateStore(ArgVal, Addr);
2301 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2303 Cleanup & EHCleanup);
2304
2305 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2306 // argument has to be alive.
2307 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2308 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2309 }
2310 }
2311 } else {
2312 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2313 }
2314
2315 unsigned ArgValSize =
2316 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2317 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2318 ArgValSize);
2319 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2320 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2321 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2322 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2323 Args.add(RValue::get(ArgVal), ArgTy);
2324 }
2325
2326 const CGFunctionInfo &FI =
2329 Layout, BufAddr.getAlignment());
2331 return RValue::get(BufAddr, *this);
2332}
2333
2335 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2336 WidthAndSignedness ResultInfo) {
2337 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2338 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2339 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2340}
2341
2343 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2344 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2345 const clang::Expr *ResultArg, QualType ResultQTy,
2346 WidthAndSignedness ResultInfo) {
2348 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2349 "Cannot specialize this multiply");
2350
2351 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2352 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2353
2354 llvm::Value *HasOverflow;
2355 llvm::Value *Result = EmitOverflowIntrinsic(
2356 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2357
2358 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2359 // however, since the original builtin had a signed result, we need to report
2360 // an overflow when the result is greater than INT_MAX.
2361 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2362 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2363
2364 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2365 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2366
2367 bool isVolatile =
2368 ResultArg->getType()->getPointeeType().isVolatileQualified();
2369 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2370 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2371 isVolatile);
2372 return RValue::get(HasOverflow);
2373}
2374
2375/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2376static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2377 WidthAndSignedness Op1Info,
2378 WidthAndSignedness Op2Info,
2379 WidthAndSignedness ResultInfo) {
2380 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2381 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2382 Op1Info.Signed != Op2Info.Signed;
2383}
2384
2385/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2386/// the generic checked-binop irgen.
2387static RValue
2389 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2390 WidthAndSignedness Op2Info,
2391 const clang::Expr *ResultArg, QualType ResultQTy,
2392 WidthAndSignedness ResultInfo) {
2393 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2394 Op2Info, ResultInfo) &&
2395 "Not a mixed-sign multipliction we can specialize");
2396
2397 // Emit the signed and unsigned operands.
2398 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2399 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2400 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2401 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2402 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2403 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2404
2405 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2406 if (SignedOpWidth < UnsignedOpWidth)
2407 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2408 if (UnsignedOpWidth < SignedOpWidth)
2409 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2410
2411 llvm::Type *OpTy = Signed->getType();
2412 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2413 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2414 llvm::Type *ResTy = ResultPtr.getElementType();
2415 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2416
2417 // Take the absolute value of the signed operand.
2418 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2419 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2420 llvm::Value *AbsSigned =
2421 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2422
2423 // Perform a checked unsigned multiplication.
2424 llvm::Value *UnsignedOverflow;
2425 llvm::Value *UnsignedResult =
2426 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2427 Unsigned, UnsignedOverflow);
2428
2429 llvm::Value *Overflow, *Result;
2430 if (ResultInfo.Signed) {
2431 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2432 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2433 auto IntMax =
2434 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2435 llvm::Value *MaxResult =
2436 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2437 CGF.Builder.CreateZExt(IsNegative, OpTy));
2438 llvm::Value *SignedOverflow =
2439 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2440 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2441
2442 // Prepare the signed result (possibly by negating it).
2443 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2444 llvm::Value *SignedResult =
2445 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2446 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2447 } else {
2448 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2449 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2450 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2451 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2452 if (ResultInfo.Width < OpWidth) {
2453 auto IntMax =
2454 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2455 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2456 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2457 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2458 }
2459
2460 // Negate the product if it would be negative in infinite precision.
2461 Result = CGF.Builder.CreateSelect(
2462 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2463
2464 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2465 }
2466 assert(Overflow && Result && "Missing overflow or result");
2467
2468 bool isVolatile =
2469 ResultArg->getType()->getPointeeType().isVolatileQualified();
2470 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2471 isVolatile);
2472 return RValue::get(Overflow);
2473}
2474
2475static bool
2477 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2478 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2479 Ty = Ctx.getBaseElementType(Arr);
2480
2481 const auto *Record = Ty->getAsCXXRecordDecl();
2482 if (!Record)
2483 return false;
2484
2485 // We've already checked this type, or are in the process of checking it.
2486 if (!Seen.insert(Record).second)
2487 return false;
2488
2489 assert(Record->hasDefinition() &&
2490 "Incomplete types should already be diagnosed");
2491
2492 if (Record->isDynamicClass())
2493 return true;
2494
2495 for (FieldDecl *F : Record->fields()) {
2496 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2497 return true;
2498 }
2499 return false;
2500}
2501
2502/// Determine if the specified type requires laundering by checking if it is a
2503/// dynamic class type or contains a subobject which is a dynamic class type.
2505 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2506 return false;
2508 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2509}
2510
2511RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2512 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2513 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2514
2515 // The builtin's shift arg may have a different type than the source arg and
2516 // result, but the LLVM intrinsic uses the same type for all values.
2517 llvm::Type *Ty = Src->getType();
2518 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2519
2520 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2521 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2522 Function *F = CGM.getIntrinsic(IID, Ty);
2523 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2524}
2525
2526// Map math builtins for long-double to f128 version.
2527static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2528 switch (BuiltinID) {
2529#define MUTATE_LDBL(func) \
2530 case Builtin::BI__builtin_##func##l: \
2531 return Builtin::BI__builtin_##func##f128;
2562 MUTATE_LDBL(nans)
2563 MUTATE_LDBL(inf)
2582 MUTATE_LDBL(huge_val)
2592#undef MUTATE_LDBL
2593 default:
2594 return BuiltinID;
2595 }
2596}
2597
2598static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2599 Value *V) {
2600 if (CGF.Builder.getIsFPConstrained() &&
2601 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2602 if (Value *Result =
2603 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2604 return Result;
2605 }
2606 return nullptr;
2607}
2608
2610 const FunctionDecl *FD) {
2611 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2612 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2613 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2614
2616 for (auto &&FormalTy : FnTy->params())
2617 Args.push_back(llvm::PoisonValue::get(FormalTy));
2618
2619 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2620}
2621
2622RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2623 const CallExpr *E,
2624 ReturnValueSlot ReturnValue) {
2625 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2626 // See if we can constant fold this builtin. If so, don't emit it at all.
2627 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2629 if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&
2630 !Result.hasSideEffects()) {
2631 if (Result.Val.isInt())
2632 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2633 Result.Val.getInt()));
2634 if (Result.Val.isFloat())
2635 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2636 Result.Val.getFloat()));
2637 }
2638
2639 // If current long-double semantics is IEEE 128-bit, replace math builtins
2640 // of long-double with f128 equivalent.
2641 // TODO: This mutation should also be applied to other targets other than PPC,
2642 // after backend supports IEEE 128-bit style libcalls.
2643 if (getTarget().getTriple().isPPC64() &&
2644 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2645 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2646
2647 // If the builtin has been declared explicitly with an assembler label,
2648 // disable the specialized emitting below. Ideally we should communicate the
2649 // rename in IR, or at least avoid generating the intrinsic calls that are
2650 // likely to get lowered to the renamed library functions.
2651 const unsigned BuiltinIDIfNoAsmLabel =
2652 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2653
2654 std::optional<bool> ErrnoOverriden;
2655 // ErrnoOverriden is true if math-errno is overriden via the
2656 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2657 // which implies math-errno.
2658 if (E->hasStoredFPFeatures()) {
2660 if (OP.hasMathErrnoOverride())
2661 ErrnoOverriden = OP.getMathErrnoOverride();
2662 }
2663 // True if 'atttibute__((optnone)) is used. This attibute overrides
2664 // fast-math which implies math-errno.
2665 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2666
2667 // True if we are compiling at -O2 and errno has been disabled
2668 // using the '#pragma float_control(precise, off)', and
2669 // attribute opt-none hasn't been seen.
2670 bool ErrnoOverridenToFalseWithOpt =
2671 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2672 CGM.getCodeGenOpts().OptimizationLevel != 0;
2673
2674 // There are LLVM math intrinsics/instructions corresponding to math library
2675 // functions except the LLVM op will never set errno while the math library
2676 // might. Also, math builtins have the same semantics as their math library
2677 // twins. Thus, we can transform math library and builtin calls to their
2678 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2679 // In case FP exceptions are enabled, the experimental versions of the
2680 // intrinsics model those.
2681 bool ConstAlways =
2682 getContext().BuiltinInfo.isConst(BuiltinID);
2683
2684 // There's a special case with the fma builtins where they are always const
2685 // if the target environment is GNU or the target is OS is Windows and we're
2686 // targeting the MSVCRT.dll environment.
2687 // FIXME: This list can be become outdated. Need to find a way to get it some
2688 // other way.
2689 switch (BuiltinID) {
2690 case Builtin::BI__builtin_fma:
2691 case Builtin::BI__builtin_fmaf:
2692 case Builtin::BI__builtin_fmal:
2693 case Builtin::BIfma:
2694 case Builtin::BIfmaf:
2695 case Builtin::BIfmal: {
2696 auto &Trip = CGM.getTriple();
2697 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2698 ConstAlways = true;
2699 break;
2700 }
2701 default:
2702 break;
2703 }
2704
2705 bool ConstWithoutErrnoAndExceptions =
2707 bool ConstWithoutExceptions =
2709
2710 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2711 // disabled.
2712 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2713 // or attributes that affect math-errno should prevent or allow math
2714 // intrincs to be generated. Intrinsics are generated:
2715 // 1- In fast math mode, unless math-errno is overriden
2716 // via '#pragma float_control(precise, on)', or via an
2717 // 'attribute__((optnone))'.
2718 // 2- If math-errno was enabled on command line but overriden
2719 // to false via '#pragma float_control(precise, off))' and
2720 // 'attribute__((optnone))' hasn't been used.
2721 // 3- If we are compiling with optimization and errno has been disabled
2722 // via '#pragma float_control(precise, off)', and
2723 // 'attribute__((optnone))' hasn't been used.
2724
2725 bool ConstWithoutErrnoOrExceptions =
2726 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2727 bool GenerateIntrinsics =
2728 (ConstAlways && !OptNone) ||
2729 (!getLangOpts().MathErrno &&
2730 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2731 if (!GenerateIntrinsics) {
2732 GenerateIntrinsics =
2733 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2734 if (!GenerateIntrinsics)
2735 GenerateIntrinsics =
2736 ConstWithoutErrnoOrExceptions &&
2737 (!getLangOpts().MathErrno &&
2738 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2739 if (!GenerateIntrinsics)
2740 GenerateIntrinsics =
2741 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2742 }
2743 if (GenerateIntrinsics) {
2744 switch (BuiltinIDIfNoAsmLabel) {
2745 case Builtin::BIceil:
2746 case Builtin::BIceilf:
2747 case Builtin::BIceill:
2748 case Builtin::BI__builtin_ceil:
2749 case Builtin::BI__builtin_ceilf:
2750 case Builtin::BI__builtin_ceilf16:
2751 case Builtin::BI__builtin_ceill:
2752 case Builtin::BI__builtin_ceilf128:
2754 Intrinsic::ceil,
2755 Intrinsic::experimental_constrained_ceil));
2756
2757 case Builtin::BIcopysign:
2758 case Builtin::BIcopysignf:
2759 case Builtin::BIcopysignl:
2760 case Builtin::BI__builtin_copysign:
2761 case Builtin::BI__builtin_copysignf:
2762 case Builtin::BI__builtin_copysignf16:
2763 case Builtin::BI__builtin_copysignl:
2764 case Builtin::BI__builtin_copysignf128:
2765 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
2766
2767 case Builtin::BIcos:
2768 case Builtin::BIcosf:
2769 case Builtin::BIcosl:
2770 case Builtin::BI__builtin_cos:
2771 case Builtin::BI__builtin_cosf:
2772 case Builtin::BI__builtin_cosf16:
2773 case Builtin::BI__builtin_cosl:
2774 case Builtin::BI__builtin_cosf128:
2776 Intrinsic::cos,
2777 Intrinsic::experimental_constrained_cos));
2778
2779 case Builtin::BIexp:
2780 case Builtin::BIexpf:
2781 case Builtin::BIexpl:
2782 case Builtin::BI__builtin_exp:
2783 case Builtin::BI__builtin_expf:
2784 case Builtin::BI__builtin_expf16:
2785 case Builtin::BI__builtin_expl:
2786 case Builtin::BI__builtin_expf128:
2788 Intrinsic::exp,
2789 Intrinsic::experimental_constrained_exp));
2790
2791 case Builtin::BIexp2:
2792 case Builtin::BIexp2f:
2793 case Builtin::BIexp2l:
2794 case Builtin::BI__builtin_exp2:
2795 case Builtin::BI__builtin_exp2f:
2796 case Builtin::BI__builtin_exp2f16:
2797 case Builtin::BI__builtin_exp2l:
2798 case Builtin::BI__builtin_exp2f128:
2800 Intrinsic::exp2,
2801 Intrinsic::experimental_constrained_exp2));
2802 case Builtin::BI__builtin_exp10:
2803 case Builtin::BI__builtin_exp10f:
2804 case Builtin::BI__builtin_exp10f16:
2805 case Builtin::BI__builtin_exp10l:
2806 case Builtin::BI__builtin_exp10f128: {
2807 // TODO: strictfp support
2808 if (Builder.getIsFPConstrained())
2809 break;
2810 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10));
2811 }
2812 case Builtin::BIfabs:
2813 case Builtin::BIfabsf:
2814 case Builtin::BIfabsl:
2815 case Builtin::BI__builtin_fabs:
2816 case Builtin::BI__builtin_fabsf:
2817 case Builtin::BI__builtin_fabsf16:
2818 case Builtin::BI__builtin_fabsl:
2819 case Builtin::BI__builtin_fabsf128:
2820 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
2821
2822 case Builtin::BIfloor:
2823 case Builtin::BIfloorf:
2824 case Builtin::BIfloorl:
2825 case Builtin::BI__builtin_floor:
2826 case Builtin::BI__builtin_floorf:
2827 case Builtin::BI__builtin_floorf16:
2828 case Builtin::BI__builtin_floorl:
2829 case Builtin::BI__builtin_floorf128:
2831 Intrinsic::floor,
2832 Intrinsic::experimental_constrained_floor));
2833
2834 case Builtin::BIfma:
2835 case Builtin::BIfmaf:
2836 case Builtin::BIfmal:
2837 case Builtin::BI__builtin_fma:
2838 case Builtin::BI__builtin_fmaf:
2839 case Builtin::BI__builtin_fmaf16:
2840 case Builtin::BI__builtin_fmal:
2841 case Builtin::BI__builtin_fmaf128:
2843 Intrinsic::fma,
2844 Intrinsic::experimental_constrained_fma));
2845
2846 case Builtin::BIfmax:
2847 case Builtin::BIfmaxf:
2848 case Builtin::BIfmaxl:
2849 case Builtin::BI__builtin_fmax:
2850 case Builtin::BI__builtin_fmaxf:
2851 case Builtin::BI__builtin_fmaxf16:
2852 case Builtin::BI__builtin_fmaxl:
2853 case Builtin::BI__builtin_fmaxf128:
2855 Intrinsic::maxnum,
2856 Intrinsic::experimental_constrained_maxnum));
2857
2858 case Builtin::BIfmin:
2859 case Builtin::BIfminf:
2860 case Builtin::BIfminl:
2861 case Builtin::BI__builtin_fmin:
2862 case Builtin::BI__builtin_fminf:
2863 case Builtin::BI__builtin_fminf16:
2864 case Builtin::BI__builtin_fminl:
2865 case Builtin::BI__builtin_fminf128:
2867 Intrinsic::minnum,
2868 Intrinsic::experimental_constrained_minnum));
2869
2870 // fmod() is a special-case. It maps to the frem instruction rather than an
2871 // LLVM intrinsic.
2872 case Builtin::BIfmod:
2873 case Builtin::BIfmodf:
2874 case Builtin::BIfmodl:
2875 case Builtin::BI__builtin_fmod:
2876 case Builtin::BI__builtin_fmodf:
2877 case Builtin::BI__builtin_fmodf16:
2878 case Builtin::BI__builtin_fmodl:
2879 case Builtin::BI__builtin_fmodf128: {
2880 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2881 Value *Arg1 = EmitScalarExpr(E->getArg(0));
2882 Value *Arg2 = EmitScalarExpr(E->getArg(1));
2883 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2884 }
2885
2886 case Builtin::BIlog:
2887 case Builtin::BIlogf:
2888 case Builtin::BIlogl:
2889 case Builtin::BI__builtin_log:
2890 case Builtin::BI__builtin_logf:
2891 case Builtin::BI__builtin_logf16:
2892 case Builtin::BI__builtin_logl:
2893 case Builtin::BI__builtin_logf128:
2895 Intrinsic::log,
2896 Intrinsic::experimental_constrained_log));
2897
2898 case Builtin::BIlog10:
2899 case Builtin::BIlog10f:
2900 case Builtin::BIlog10l:
2901 case Builtin::BI__builtin_log10:
2902 case Builtin::BI__builtin_log10f:
2903 case Builtin::BI__builtin_log10f16:
2904 case Builtin::BI__builtin_log10l:
2905 case Builtin::BI__builtin_log10f128:
2907 Intrinsic::log10,
2908 Intrinsic::experimental_constrained_log10));
2909
2910 case Builtin::BIlog2:
2911 case Builtin::BIlog2f:
2912 case Builtin::BIlog2l:
2913 case Builtin::BI__builtin_log2:
2914 case Builtin::BI__builtin_log2f:
2915 case Builtin::BI__builtin_log2f16:
2916 case Builtin::BI__builtin_log2l:
2917 case Builtin::BI__builtin_log2f128:
2919 Intrinsic::log2,
2920 Intrinsic::experimental_constrained_log2));
2921
2922 case Builtin::BInearbyint:
2923 case Builtin::BInearbyintf:
2924 case Builtin::BInearbyintl:
2925 case Builtin::BI__builtin_nearbyint:
2926 case Builtin::BI__builtin_nearbyintf:
2927 case Builtin::BI__builtin_nearbyintl:
2928 case Builtin::BI__builtin_nearbyintf128:
2930 Intrinsic::nearbyint,
2931 Intrinsic::experimental_constrained_nearbyint));
2932
2933 case Builtin::BIpow:
2934 case Builtin::BIpowf:
2935 case Builtin::BIpowl:
2936 case Builtin::BI__builtin_pow:
2937 case Builtin::BI__builtin_powf:
2938 case Builtin::BI__builtin_powf16:
2939 case Builtin::BI__builtin_powl:
2940 case Builtin::BI__builtin_powf128:
2942 Intrinsic::pow,
2943 Intrinsic::experimental_constrained_pow));
2944
2945 case Builtin::BIrint:
2946 case Builtin::BIrintf:
2947 case Builtin::BIrintl:
2948 case Builtin::BI__builtin_rint:
2949 case Builtin::BI__builtin_rintf:
2950 case Builtin::BI__builtin_rintf16:
2951 case Builtin::BI__builtin_rintl:
2952 case Builtin::BI__builtin_rintf128:
2954 Intrinsic::rint,
2955 Intrinsic::experimental_constrained_rint));
2956
2957 case Builtin::BIround:
2958 case Builtin::BIroundf:
2959 case Builtin::BIroundl:
2960 case Builtin::BI__builtin_round:
2961 case Builtin::BI__builtin_roundf:
2962 case Builtin::BI__builtin_roundf16:
2963 case Builtin::BI__builtin_roundl:
2964 case Builtin::BI__builtin_roundf128:
2966 Intrinsic::round,
2967 Intrinsic::experimental_constrained_round));
2968
2969 case Builtin::BIroundeven:
2970 case Builtin::BIroundevenf:
2971 case Builtin::BIroundevenl:
2972 case Builtin::BI__builtin_roundeven:
2973 case Builtin::BI__builtin_roundevenf:
2974 case Builtin::BI__builtin_roundevenf16:
2975 case Builtin::BI__builtin_roundevenl:
2976 case Builtin::BI__builtin_roundevenf128:
2978 Intrinsic::roundeven,
2979 Intrinsic::experimental_constrained_roundeven));
2980
2981 case Builtin::BIsin:
2982 case Builtin::BIsinf:
2983 case Builtin::BIsinl:
2984 case Builtin::BI__builtin_sin:
2985 case Builtin::BI__builtin_sinf:
2986 case Builtin::BI__builtin_sinf16:
2987 case Builtin::BI__builtin_sinl:
2988 case Builtin::BI__builtin_sinf128:
2990 Intrinsic::sin,
2991 Intrinsic::experimental_constrained_sin));
2992
2993 case Builtin::BIsqrt:
2994 case Builtin::BIsqrtf:
2995 case Builtin::BIsqrtl:
2996 case Builtin::BI__builtin_sqrt:
2997 case Builtin::BI__builtin_sqrtf:
2998 case Builtin::BI__builtin_sqrtf16:
2999 case Builtin::BI__builtin_sqrtl:
3000 case Builtin::BI__builtin_sqrtf128:
3001 case Builtin::BI__builtin_elementwise_sqrt: {
3003 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
3005 return RValue::get(Call);
3006 }
3007 case Builtin::BItrunc:
3008 case Builtin::BItruncf:
3009 case Builtin::BItruncl:
3010 case Builtin::BI__builtin_trunc:
3011 case Builtin::BI__builtin_truncf:
3012 case Builtin::BI__builtin_truncf16:
3013 case Builtin::BI__builtin_truncl:
3014 case Builtin::BI__builtin_truncf128:
3016 Intrinsic::trunc,
3017 Intrinsic::experimental_constrained_trunc));
3018
3019 case Builtin::BIlround:
3020 case Builtin::BIlroundf:
3021 case Builtin::BIlroundl:
3022 case Builtin::BI__builtin_lround:
3023 case Builtin::BI__builtin_lroundf:
3024 case Builtin::BI__builtin_lroundl:
3025 case Builtin::BI__builtin_lroundf128:
3027 *this, E, Intrinsic::lround,
3028 Intrinsic::experimental_constrained_lround));
3029
3030 case Builtin::BIllround:
3031 case Builtin::BIllroundf:
3032 case Builtin::BIllroundl:
3033 case Builtin::BI__builtin_llround:
3034 case Builtin::BI__builtin_llroundf:
3035 case Builtin::BI__builtin_llroundl:
3036 case Builtin::BI__builtin_llroundf128:
3038 *this, E, Intrinsic::llround,
3039 Intrinsic::experimental_constrained_llround));
3040
3041 case Builtin::BIlrint:
3042 case Builtin::BIlrintf:
3043 case Builtin::BIlrintl:
3044 case Builtin::BI__builtin_lrint:
3045 case Builtin::BI__builtin_lrintf:
3046 case Builtin::BI__builtin_lrintl:
3047 case Builtin::BI__builtin_lrintf128:
3049 *this, E, Intrinsic::lrint,
3050 Intrinsic::experimental_constrained_lrint));
3051
3052 case Builtin::BIllrint:
3053 case Builtin::BIllrintf:
3054 case Builtin::BIllrintl:
3055 case Builtin::BI__builtin_llrint:
3056 case Builtin::BI__builtin_llrintf:
3057 case Builtin::BI__builtin_llrintl:
3058 case Builtin::BI__builtin_llrintf128:
3060 *this, E, Intrinsic::llrint,
3061 Intrinsic::experimental_constrained_llrint));
3062 case Builtin::BI__builtin_ldexp:
3063 case Builtin::BI__builtin_ldexpf:
3064 case Builtin::BI__builtin_ldexpl:
3065 case Builtin::BI__builtin_ldexpf16:
3066 case Builtin::BI__builtin_ldexpf128: {
3068 *this, E, Intrinsic::ldexp,
3069 Intrinsic::experimental_constrained_ldexp));
3070 }
3071 default:
3072 break;
3073 }
3074 }
3075
3076 // Check NonnullAttribute/NullabilityArg and Alignment.
3077 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3078 unsigned ParmNum) {
3079 Value *Val = A.emitRawPointer(*this);
3080 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3081 ParmNum);
3082
3083 if (SanOpts.has(SanitizerKind::Alignment)) {
3084 SanitizerSet SkippedChecks;
3085 SkippedChecks.set(SanitizerKind::All);
3086 SkippedChecks.clear(SanitizerKind::Alignment);
3087 SourceLocation Loc = Arg->getExprLoc();
3088 // Strip an implicit cast.
3089 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3090 if (CE->getCastKind() == CK_BitCast)
3091 Arg = CE->getSubExpr();
3092 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3093 SkippedChecks);
3094 }
3095 };
3096
3097 switch (BuiltinIDIfNoAsmLabel) {
3098 default: break;
3099 case Builtin::BI__builtin___CFStringMakeConstantString:
3100 case Builtin::BI__builtin___NSStringMakeConstantString:
3101 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3102 case Builtin::BI__builtin_stdarg_start:
3103 case Builtin::BI__builtin_va_start:
3104 case Builtin::BI__va_start:
3105 case Builtin::BI__builtin_va_end:
3106 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3107 ? EmitScalarExpr(E->getArg(0))
3108 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3109 BuiltinID != Builtin::BI__builtin_va_end);
3110 return RValue::get(nullptr);
3111 case Builtin::BI__builtin_va_copy: {
3112 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3113 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3114 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3115 {DstPtr, SrcPtr});
3116 return RValue::get(nullptr);
3117 }
3118 case Builtin::BIabs:
3119 case Builtin::BIlabs:
3120 case Builtin::BIllabs:
3121 case Builtin::BI__builtin_abs:
3122 case Builtin::BI__builtin_labs:
3123 case Builtin::BI__builtin_llabs: {
3124 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3125
3126 Value *Result;
3127 switch (getLangOpts().getSignedOverflowBehavior()) {
3129 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3130 break;
3132 if (!SanitizeOverflow) {
3133 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3134 break;
3135 }
3136 [[fallthrough]];
3138 // TODO: Somehow handle the corner case when the address of abs is taken.
3139 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3140 break;
3141 }
3142 return RValue::get(Result);
3143 }
3144 case Builtin::BI__builtin_complex: {
3145 Value *Real = EmitScalarExpr(E->getArg(0));
3146 Value *Imag = EmitScalarExpr(E->getArg(1));
3147 return RValue::getComplex({Real, Imag});
3148 }
3149 case Builtin::BI__builtin_conj:
3150 case Builtin::BI__builtin_conjf:
3151 case Builtin::BI__builtin_conjl:
3152 case Builtin::BIconj:
3153 case Builtin::BIconjf:
3154 case Builtin::BIconjl: {
3155 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3156 Value *Real = ComplexVal.first;
3157 Value *Imag = ComplexVal.second;
3158 Imag = Builder.CreateFNeg(Imag, "neg");
3159 return RValue::getComplex(std::make_pair(Real, Imag));
3160 }
3161 case Builtin::BI__builtin_creal:
3162 case Builtin::BI__builtin_crealf:
3163 case Builtin::BI__builtin_creall:
3164 case Builtin::BIcreal:
3165 case Builtin::BIcrealf:
3166 case Builtin::BIcreall: {
3167 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3168 return RValue::get(ComplexVal.first);
3169 }
3170
3171 case Builtin::BI__builtin_preserve_access_index: {
3172 // Only enabled preserved access index region when debuginfo
3173 // is available as debuginfo is needed to preserve user-level
3174 // access pattern.
3175 if (!getDebugInfo()) {
3176 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3177 return RValue::get(EmitScalarExpr(E->getArg(0)));
3178 }
3179
3180 // Nested builtin_preserve_access_index() not supported
3182 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3183 return RValue::get(EmitScalarExpr(E->getArg(0)));
3184 }
3185
3186 IsInPreservedAIRegion = true;
3187 Value *Res = EmitScalarExpr(E->getArg(0));
3188 IsInPreservedAIRegion = false;
3189 return RValue::get(Res);
3190 }
3191
3192 case Builtin::BI__builtin_cimag:
3193 case Builtin::BI__builtin_cimagf:
3194 case Builtin::BI__builtin_cimagl:
3195 case Builtin::BIcimag:
3196 case Builtin::BIcimagf:
3197 case Builtin::BIcimagl: {
3198 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3199 return RValue::get(ComplexVal.second);
3200 }
3201
3202 case Builtin::BI__builtin_clrsb:
3203 case Builtin::BI__builtin_clrsbl:
3204 case Builtin::BI__builtin_clrsbll: {
3205 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3206 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3207
3208 llvm::Type *ArgType = ArgValue->getType();
3209 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3210
3211 llvm::Type *ResultType = ConvertType(E->getType());
3212 Value *Zero = llvm::Constant::getNullValue(ArgType);
3213 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3214 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3215 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3216 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3217 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3218 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3219 "cast");
3220 return RValue::get(Result);
3221 }
3222 case Builtin::BI__builtin_ctzs:
3223 case Builtin::BI__builtin_ctz:
3224 case Builtin::BI__builtin_ctzl:
3225 case Builtin::BI__builtin_ctzll:
3226 case Builtin::BI__builtin_ctzg: {
3227 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3228 E->getNumArgs() > 1;
3229
3230 Value *ArgValue =
3231 HasFallback ? EmitScalarExpr(E->getArg(0))
3233
3234 llvm::Type *ArgType = ArgValue->getType();
3235 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3236
3237 llvm::Type *ResultType = ConvertType(E->getType());
3238 Value *ZeroUndef =
3239 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3240 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3241 if (Result->getType() != ResultType)
3242 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3243 "cast");
3244 if (!HasFallback)
3245 return RValue::get(Result);
3246
3247 Value *Zero = Constant::getNullValue(ArgType);
3248 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3249 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3250 Value *ResultOrFallback =
3251 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3252 return RValue::get(ResultOrFallback);
3253 }
3254 case Builtin::BI__builtin_clzs:
3255 case Builtin::BI__builtin_clz:
3256 case Builtin::BI__builtin_clzl:
3257 case Builtin::BI__builtin_clzll:
3258 case Builtin::BI__builtin_clzg: {
3259 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3260 E->getNumArgs() > 1;
3261
3262 Value *ArgValue =
3263 HasFallback ? EmitScalarExpr(E->getArg(0))
3265
3266 llvm::Type *ArgType = ArgValue->getType();
3267 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3268
3269 llvm::Type *ResultType = ConvertType(E->getType());
3270 Value *ZeroUndef =
3271 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3272 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3273 if (Result->getType() != ResultType)
3274 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3275 "cast");
3276 if (!HasFallback)
3277 return RValue::get(Result);
3278
3279 Value *Zero = Constant::getNullValue(ArgType);
3280 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3281 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3282 Value *ResultOrFallback =
3283 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3284 return RValue::get(ResultOrFallback);
3285 }
3286 case Builtin::BI__builtin_ffs:
3287 case Builtin::BI__builtin_ffsl:
3288 case Builtin::BI__builtin_ffsll: {
3289 // ffs(x) -> x ? cttz(x) + 1 : 0
3290 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3291
3292 llvm::Type *ArgType = ArgValue->getType();
3293 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3294
3295 llvm::Type *ResultType = ConvertType(E->getType());
3296 Value *Tmp =
3297 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3298 llvm::ConstantInt::get(ArgType, 1));
3299 Value *Zero = llvm::Constant::getNullValue(ArgType);
3300 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3301 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3302 if (Result->getType() != ResultType)
3303 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3304 "cast");
3305 return RValue::get(Result);
3306 }
3307 case Builtin::BI__builtin_parity:
3308 case Builtin::BI__builtin_parityl:
3309 case Builtin::BI__builtin_parityll: {
3310 // parity(x) -> ctpop(x) & 1
3311 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3312
3313 llvm::Type *ArgType = ArgValue->getType();
3314 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3315
3316 llvm::Type *ResultType = ConvertType(E->getType());
3317 Value *Tmp = Builder.CreateCall(F, ArgValue);
3318 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3319 if (Result->getType() != ResultType)
3320 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3321 "cast");
3322 return RValue::get(Result);
3323 }
3324 case Builtin::BI__lzcnt16:
3325 case Builtin::BI__lzcnt:
3326 case Builtin::BI__lzcnt64: {
3327 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3328
3329 llvm::Type *ArgType = ArgValue->getType();
3330 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3331
3332 llvm::Type *ResultType = ConvertType(E->getType());
3333 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3334 if (Result->getType() != ResultType)
3335 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3336 "cast");
3337 return RValue::get(Result);
3338 }
3339 case Builtin::BI__popcnt16:
3340 case Builtin::BI__popcnt:
3341 case Builtin::BI__popcnt64:
3342 case Builtin::BI__builtin_popcount:
3343 case Builtin::BI__builtin_popcountl:
3344 case Builtin::BI__builtin_popcountll:
3345 case Builtin::BI__builtin_popcountg: {
3346 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3347
3348 llvm::Type *ArgType = ArgValue->getType();
3349 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3350
3351 llvm::Type *ResultType = ConvertType(E->getType());
3352 Value *Result = Builder.CreateCall(F, ArgValue);
3353 if (Result->getType() != ResultType)
3354 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3355 "cast");
3356 return RValue::get(Result);
3357 }
3358 case Builtin::BI__builtin_unpredictable: {
3359 // Always return the argument of __builtin_unpredictable. LLVM does not
3360 // handle this builtin. Metadata for this builtin should be added directly
3361 // to instructions such as branches or switches that use it.
3362 return RValue::get(EmitScalarExpr(E->getArg(0)));
3363 }
3364 case Builtin::BI__builtin_expect: {
3365 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3366 llvm::Type *ArgType = ArgValue->getType();
3367
3368 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3369 // Don't generate llvm.expect on -O0 as the backend won't use it for
3370 // anything.
3371 // Note, we still IRGen ExpectedValue because it could have side-effects.
3372 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3373 return RValue::get(ArgValue);
3374
3375 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3376 Value *Result =
3377 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3378 return RValue::get(Result);
3379 }
3380 case Builtin::BI__builtin_expect_with_probability: {
3381 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3382 llvm::Type *ArgType = ArgValue->getType();
3383
3384 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3385 llvm::APFloat Probability(0.0);
3386 const Expr *ProbArg = E->getArg(2);
3387 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3388 assert(EvalSucceed && "probability should be able to evaluate as float");
3389 (void)EvalSucceed;
3390 bool LoseInfo = false;
3391 Probability.convert(llvm::APFloat::IEEEdouble(),
3392 llvm::RoundingMode::Dynamic, &LoseInfo);
3393 llvm::Type *Ty = ConvertType(ProbArg->getType());
3394 Constant *Confidence = ConstantFP::get(Ty, Probability);
3395 // Don't generate llvm.expect.with.probability on -O0 as the backend
3396 // won't use it for anything.
3397 // Note, we still IRGen ExpectedValue because it could have side-effects.
3398 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3399 return RValue::get(ArgValue);
3400
3401 Function *FnExpect =
3402 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3403 Value *Result = Builder.CreateCall(
3404 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3405 return RValue::get(Result);
3406 }
3407 case Builtin::BI__builtin_assume_aligned: {
3408 const Expr *Ptr = E->getArg(0);
3409 Value *PtrValue = EmitScalarExpr(Ptr);
3410 Value *OffsetValue =
3411 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3412
3413 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3414 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3415 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3416 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3417 llvm::Value::MaximumAlignment);
3418
3419 emitAlignmentAssumption(PtrValue, Ptr,
3420 /*The expr loc is sufficient.*/ SourceLocation(),
3421 AlignmentCI, OffsetValue);
3422 return RValue::get(PtrValue);
3423 }
3424 case Builtin::BI__assume:
3425 case Builtin::BI__builtin_assume: {
3426 if (E->getArg(0)->HasSideEffects(getContext()))
3427 return RValue::get(nullptr);
3428
3429 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3430 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3431 Builder.CreateCall(FnAssume, ArgValue);
3432 return RValue::get(nullptr);
3433 }
3434 case Builtin::BI__builtin_assume_separate_storage: {
3435 const Expr *Arg0 = E->getArg(0);
3436 const Expr *Arg1 = E->getArg(1);
3437
3438 Value *Value0 = EmitScalarExpr(Arg0);
3439 Value *Value1 = EmitScalarExpr(Arg1);
3440
3441 Value *Values[] = {Value0, Value1};
3442 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3443 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3444 return RValue::get(nullptr);
3445 }
3446 case Builtin::BI__builtin_allow_runtime_check: {
3447 StringRef Kind =
3448 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3449 LLVMContext &Ctx = CGM.getLLVMContext();
3450 llvm::Value *Allow = Builder.CreateCall(
3451 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3452 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3453 return RValue::get(Allow);
3454 }
3455 case Builtin::BI__arithmetic_fence: {
3456 // Create the builtin call if FastMath is selected, and the target
3457 // supports the builtin, otherwise just return the argument.
3458 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3459 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3460 bool isArithmeticFenceEnabled =
3461 FMF.allowReassoc() &&
3463 QualType ArgType = E->getArg(0)->getType();
3464 if (ArgType->isComplexType()) {
3465 if (isArithmeticFenceEnabled) {
3466 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3467 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3468 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3469 ConvertType(ElementType));
3470 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3471 ConvertType(ElementType));
3472 return RValue::getComplex(std::make_pair(Real, Imag));
3473 }
3474 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3475 Value *Real = ComplexVal.first;
3476 Value *Imag = ComplexVal.second;
3477 return RValue::getComplex(std::make_pair(Real, Imag));
3478 }
3479 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3480 if (isArithmeticFenceEnabled)
3481 return RValue::get(
3482 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3483 return RValue::get(ArgValue);
3484 }
3485 case Builtin::BI__builtin_bswap16:
3486 case Builtin::BI__builtin_bswap32:
3487 case Builtin::BI__builtin_bswap64:
3488 case Builtin::BI_byteswap_ushort:
3489 case Builtin::BI_byteswap_ulong:
3490 case Builtin::BI_byteswap_uint64: {
3491 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
3492 }
3493 case Builtin::BI__builtin_bitreverse8:
3494 case Builtin::BI__builtin_bitreverse16:
3495 case Builtin::BI__builtin_bitreverse32:
3496 case Builtin::BI__builtin_bitreverse64: {
3497 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
3498 }
3499 case Builtin::BI__builtin_rotateleft8:
3500 case Builtin::BI__builtin_rotateleft16:
3501 case Builtin::BI__builtin_rotateleft32:
3502 case Builtin::BI__builtin_rotateleft64:
3503 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3504 case Builtin::BI_rotl16:
3505 case Builtin::BI_rotl:
3506 case Builtin::BI_lrotl:
3507 case Builtin::BI_rotl64:
3508 return emitRotate(E, false);
3509
3510 case Builtin::BI__builtin_rotateright8:
3511 case Builtin::BI__builtin_rotateright16:
3512 case Builtin::BI__builtin_rotateright32:
3513 case Builtin::BI__builtin_rotateright64:
3514 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3515 case Builtin::BI_rotr16:
3516 case Builtin::BI_rotr:
3517 case Builtin::BI_lrotr:
3518 case Builtin::BI_rotr64:
3519 return emitRotate(E, true);
3520
3521 case Builtin::BI__builtin_constant_p: {
3522 llvm::Type *ResultType = ConvertType(E->getType());
3523
3524 const Expr *Arg = E->getArg(0);
3525 QualType ArgType = Arg->getType();
3526 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3527 // and likely a mistake.
3528 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3529 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3530 // Per the GCC documentation, only numeric constants are recognized after
3531 // inlining.
3532 return RValue::get(ConstantInt::get(ResultType, 0));
3533
3534 if (Arg->HasSideEffects(getContext()))
3535 // The argument is unevaluated, so be conservative if it might have
3536 // side-effects.
3537 return RValue::get(ConstantInt::get(ResultType, 0));
3538
3539 Value *ArgValue = EmitScalarExpr(Arg);
3540 if (ArgType->isObjCObjectPointerType()) {
3541 // Convert Objective-C objects to id because we cannot distinguish between
3542 // LLVM types for Obj-C classes as they are opaque.
3543 ArgType = CGM.getContext().getObjCIdType();
3544 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3545 }
3546 Function *F =
3547 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3548 Value *Result = Builder.CreateCall(F, ArgValue);
3549 if (Result->getType() != ResultType)
3550 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3551 return RValue::get(Result);
3552 }
3553 case Builtin::BI__builtin_dynamic_object_size:
3554 case Builtin::BI__builtin_object_size: {
3555 unsigned Type =
3556 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3557 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3558
3559 // We pass this builtin onto the optimizer so that it can figure out the
3560 // object size in more complex cases.
3561 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3562 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3563 /*EmittedE=*/nullptr, IsDynamic));
3564 }
3565 case Builtin::BI__builtin_prefetch: {
3566 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3567 // FIXME: Technically these constants should of type 'int', yes?
3568 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3569 llvm::ConstantInt::get(Int32Ty, 0);
3570 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3571 llvm::ConstantInt::get(Int32Ty, 3);
3572 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3573 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3574 Builder.CreateCall(F, {Address, RW, Locality, Data});
3575 return RValue::get(nullptr);
3576 }
3577 case Builtin::BI__builtin_readcyclecounter: {
3578 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3579 return RValue::get(Builder.CreateCall(F));
3580 }
3581 case Builtin::BI__builtin_readsteadycounter: {
3582 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3583 return RValue::get(Builder.CreateCall(F));
3584 }
3585 case Builtin::BI__builtin___clear_cache: {
3586 Value *Begin = EmitScalarExpr(E->getArg(0));
3587 Value *End = EmitScalarExpr(E->getArg(1));
3588 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3589 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3590 }
3591 case Builtin::BI__builtin_trap:
3592 EmitTrapCall(Intrinsic::trap);
3593 return RValue::get(nullptr);
3594 case Builtin::BI__debugbreak:
3595 EmitTrapCall(Intrinsic::debugtrap);
3596 return RValue::get(nullptr);
3597 case Builtin::BI__builtin_unreachable: {
3599
3600 // We do need to preserve an insertion point.
3601 EmitBlock(createBasicBlock("unreachable.cont"));
3602
3603 return RValue::get(nullptr);
3604 }
3605
3606 case Builtin::BI__builtin_powi:
3607 case Builtin::BI__builtin_powif:
3608 case Builtin::BI__builtin_powil: {
3609 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3610 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3611
3612 if (Builder.getIsFPConstrained()) {
3613 // FIXME: llvm.powi has 2 mangling types,
3614 // llvm.experimental.constrained.powi has one.
3615 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3616 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3617 Src0->getType());
3618 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3619 }
3620
3621 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3622 { Src0->getType(), Src1->getType() });
3623 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3624 }
3625 case Builtin::BI__builtin_frexpl: {
3626 // Linux PPC will not be adding additional PPCDoubleDouble support.
3627 // WIP to switch default to IEEE long double. Will emit libcall for
3628 // frexpl instead of legalizing this type in the BE.
3629 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3630 break;
3631 [[fallthrough]];
3632 }
3633 case Builtin::BI__builtin_frexp:
3634 case Builtin::BI__builtin_frexpf:
3635 case Builtin::BI__builtin_frexpf128:
3636 case Builtin::BI__builtin_frexpf16:
3637 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3638 case Builtin::BI__builtin_isgreater:
3639 case Builtin::BI__builtin_isgreaterequal:
3640 case Builtin::BI__builtin_isless:
3641 case Builtin::BI__builtin_islessequal:
3642 case Builtin::BI__builtin_islessgreater:
3643 case Builtin::BI__builtin_isunordered: {
3644 // Ordered comparisons: we know the arguments to these are matching scalar
3645 // floating point values.
3646 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3647 Value *LHS = EmitScalarExpr(E->getArg(0));
3648 Value *RHS = EmitScalarExpr(E->getArg(1));
3649
3650 switch (BuiltinID) {
3651 default: llvm_unreachable("Unknown ordered comparison");
3652 case Builtin::BI__builtin_isgreater:
3653 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3654 break;
3655 case Builtin::BI__builtin_isgreaterequal:
3656 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3657 break;
3658 case Builtin::BI__builtin_isless:
3659 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3660 break;
3661 case Builtin::BI__builtin_islessequal:
3662 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3663 break;
3664 case Builtin::BI__builtin_islessgreater:
3665 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3666 break;
3667 case Builtin::BI__builtin_isunordered:
3668 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3669 break;
3670 }
3671 // ZExt bool to int type.
3672 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3673 }
3674
3675 case Builtin::BI__builtin_isnan: {
3676 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3677 Value *V = EmitScalarExpr(E->getArg(0));
3678 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3679 return RValue::get(Result);
3680 return RValue::get(
3681 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3682 ConvertType(E->getType())));
3683 }
3684
3685 case Builtin::BI__builtin_issignaling: {
3686 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3687 Value *V = EmitScalarExpr(E->getArg(0));
3688 return RValue::get(
3689 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3690 ConvertType(E->getType())));
3691 }
3692
3693 case Builtin::BI__builtin_isinf: {
3694 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3695 Value *V = EmitScalarExpr(E->getArg(0));
3696 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3697 return RValue::get(Result);
3698 return RValue::get(
3699 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
3700 ConvertType(E->getType())));
3701 }
3702
3703 case Builtin::BIfinite:
3704 case Builtin::BI__finite:
3705 case Builtin::BIfinitef:
3706 case Builtin::BI__finitef:
3707 case Builtin::BIfinitel:
3708 case Builtin::BI__finitel:
3709 case Builtin::BI__builtin_isfinite: {
3710 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3711 Value *V = EmitScalarExpr(E->getArg(0));
3712 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3713 return RValue::get(Result);
3714 return RValue::get(
3715 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
3716 ConvertType(E->getType())));
3717 }
3718
3719 case Builtin::BI__builtin_isnormal: {
3720 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3721 Value *V = EmitScalarExpr(E->getArg(0));
3722 return RValue::get(
3723 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
3724 ConvertType(E->getType())));
3725 }
3726
3727 case Builtin::BI__builtin_issubnormal: {
3728 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3729 Value *V = EmitScalarExpr(E->getArg(0));
3730 return RValue::get(
3731 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
3732 ConvertType(E->getType())));
3733 }
3734
3735 case Builtin::BI__builtin_iszero: {
3736 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3737 Value *V = EmitScalarExpr(E->getArg(0));
3738 return RValue::get(
3739 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
3740 ConvertType(E->getType())));
3741 }
3742
3743 case Builtin::BI__builtin_isfpclass: {
3745 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
3746 break;
3747 uint64_t Test = Result.Val.getInt().getLimitedValue();
3748 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3749 Value *V = EmitScalarExpr(E->getArg(0));
3750 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
3751 ConvertType(E->getType())));
3752 }
3753
3754 case Builtin::BI__builtin_nondeterministic_value: {
3755 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
3756
3757 Value *Result = PoisonValue::get(Ty);
3758 Result = Builder.CreateFreeze(Result);
3759
3760 return RValue::get(Result);
3761 }
3762
3763 case Builtin::BI__builtin_elementwise_abs: {
3764 Value *Result;
3765 QualType QT = E->getArg(0)->getType();
3766
3767 if (auto *VecTy = QT->getAs<VectorType>())
3768 QT = VecTy->getElementType();
3769 if (QT->isIntegerType())
3770 Result = Builder.CreateBinaryIntrinsic(
3771 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3772 Builder.getFalse(), nullptr, "elt.abs");
3773 else
3774 Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
3775
3776 return RValue::get(Result);
3777 }
3778
3779 case Builtin::BI__builtin_elementwise_ceil:
3780 return RValue::get(
3781 emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3782 case Builtin::BI__builtin_elementwise_exp:
3783 return RValue::get(
3784 emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp"));
3785 case Builtin::BI__builtin_elementwise_exp2:
3786 return RValue::get(
3787 emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3788 case Builtin::BI__builtin_elementwise_log:
3789 return RValue::get(
3790 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log"));
3791 case Builtin::BI__builtin_elementwise_log2:
3792 return RValue::get(
3793 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2"));
3794 case Builtin::BI__builtin_elementwise_log10:
3795 return RValue::get(
3796 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10"));
3797 case Builtin::BI__builtin_elementwise_pow: {
3798 return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow));
3799 }
3800 case Builtin::BI__builtin_elementwise_bitreverse:
3801 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse,
3802 "elt.bitreverse"));
3803 case Builtin::BI__builtin_elementwise_cos:
3804 return RValue::get(
3805 emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos"));
3806 case Builtin::BI__builtin_elementwise_floor:
3807 return RValue::get(
3808 emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
3809 case Builtin::BI__builtin_elementwise_roundeven:
3810 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
3811 "elt.roundeven"));
3812 case Builtin::BI__builtin_elementwise_round:
3813 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round,
3814 "elt.round"));
3815 case Builtin::BI__builtin_elementwise_rint:
3816 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint,
3817 "elt.rint"));
3818 case Builtin::BI__builtin_elementwise_nearbyint:
3819 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint,
3820 "elt.nearbyint"));
3821 case Builtin::BI__builtin_elementwise_sin:
3822 return RValue::get(
3823 emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
3824
3825 case Builtin::BI__builtin_elementwise_trunc:
3826 return RValue::get(
3827 emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3828 case Builtin::BI__builtin_elementwise_canonicalize:
3829 return RValue::get(
3830 emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3831 case Builtin::BI__builtin_elementwise_copysign:
3832 return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign));
3833 case Builtin::BI__builtin_elementwise_fma:
3834 return RValue::get(emitTernaryBuiltin(*this, E, llvm::Intrinsic::fma));
3835 case Builtin::BI__builtin_elementwise_add_sat:
3836 case Builtin::BI__builtin_elementwise_sub_sat: {
3837 Value *Op0 = EmitScalarExpr(E->getArg(0));
3838 Value *Op1 = EmitScalarExpr(E->getArg(1));
3839 Value *Result;
3840 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3841 QualType Ty = E->getArg(0)->getType();
3842 if (auto *VecTy = Ty->getAs<VectorType>())
3843 Ty = VecTy->getElementType();
3844 bool IsSigned = Ty->isSignedIntegerType();
3845 unsigned Opc;
3846 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3847 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3848 else
3849 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3850 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3851 return RValue::get(Result);
3852 }
3853
3854 case Builtin::BI__builtin_elementwise_max: {
3855 Value *Op0 = EmitScalarExpr(E->getArg(0));
3856 Value *Op1 = EmitScalarExpr(E->getArg(1));
3857 Value *Result;
3858 if (Op0->getType()->isIntOrIntVectorTy()) {
3859 QualType Ty = E->getArg(0)->getType();
3860 if (auto *VecTy = Ty->getAs<VectorType>())
3861 Ty = VecTy->getElementType();
3862 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3863 ? llvm::Intrinsic::smax
3864 : llvm::Intrinsic::umax,
3865 Op0, Op1, nullptr, "elt.max");
3866 } else
3867 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3868 return RValue::get(Result);
3869 }
3870 case Builtin::BI__builtin_elementwise_min: {
3871 Value *Op0 = EmitScalarExpr(E->getArg(0));
3872 Value *Op1 = EmitScalarExpr(E->getArg(1));
3873 Value *Result;
3874 if (Op0->getType()->isIntOrIntVectorTy()) {
3875 QualType Ty = E->getArg(0)->getType();
3876 if (auto *VecTy = Ty->getAs<VectorType>())
3877 Ty = VecTy->getElementType();
3878 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3879 ? llvm::Intrinsic::smin
3880 : llvm::Intrinsic::umin,
3881 Op0, Op1, nullptr, "elt.min");
3882 } else
3883 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3884 return RValue::get(Result);
3885 }
3886
3887 case Builtin::BI__builtin_reduce_max: {
3888 auto GetIntrinsicID = [](QualType QT) {
3889 if (auto *VecTy = QT->getAs<VectorType>())
3890 QT = VecTy->getElementType();
3891 if (QT->isSignedIntegerType())
3892 return llvm::Intrinsic::vector_reduce_smax;
3893 if (QT->isUnsignedIntegerType())
3894 return llvm::Intrinsic::vector_reduce_umax;
3895 assert(QT->isFloatingType() && "must have a float here");
3896 return llvm::Intrinsic::vector_reduce_fmax;
3897 };
3899 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3900 }
3901
3902 case Builtin::BI__builtin_reduce_min: {
3903 auto GetIntrinsicID = [](QualType QT) {
3904 if (auto *VecTy = QT->getAs<VectorType>())
3905 QT = VecTy->getElementType();
3906 if (QT->isSignedIntegerType())
3907 return llvm::Intrinsic::vector_reduce_smin;
3908 if (QT->isUnsignedIntegerType())
3909 return llvm::Intrinsic::vector_reduce_umin;
3910 assert(QT->isFloatingType() && "must have a float here");
3911 return llvm::Intrinsic::vector_reduce_fmin;
3912 };
3913
3915 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3916 }
3917
3918 case Builtin::BI__builtin_reduce_add:
3920 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3921 case Builtin::BI__builtin_reduce_mul:
3923 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3924 case Builtin::BI__builtin_reduce_xor:
3926 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3927 case Builtin::BI__builtin_reduce_or:
3929 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3930 case Builtin::BI__builtin_reduce_and:
3932 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3933
3934 case Builtin::BI__builtin_matrix_transpose: {
3935 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
3936 Value *MatValue = EmitScalarExpr(E->getArg(0));
3937 MatrixBuilder MB(Builder);
3938 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
3939 MatrixTy->getNumColumns());
3940 return RValue::get(Result);
3941 }
3942
3943 case Builtin::BI__builtin_matrix_column_major_load: {
3944 MatrixBuilder MB(Builder);
3945 // Emit everything that isn't dependent on the first parameter type
3946 Value *Stride = EmitScalarExpr(E->getArg(3));
3947 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3948 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
3949 assert(PtrTy && "arg0 must be of pointer type");
3950 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3951
3954 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
3955 0);
3956 Value *Result = MB.CreateColumnMajorLoad(
3957 Src.getElementType(), Src.emitRawPointer(*this),
3958 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
3959 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
3960 return RValue::get(Result);
3961 }
3962
3963 case Builtin::BI__builtin_matrix_column_major_store: {
3964 MatrixBuilder MB(Builder);
3965 Value *Matrix = EmitScalarExpr(E->getArg(0));
3967 Value *Stride = EmitScalarExpr(E->getArg(2));
3968
3969 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
3970 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
3971 assert(PtrTy && "arg1 must be of pointer type");
3972 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3973
3975 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
3976 0);
3977 Value *Result = MB.CreateColumnMajorStore(
3978 Matrix, Dst.emitRawPointer(*this),
3979 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
3980 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
3981 return RValue::get(Result);
3982 }
3983
3984 case Builtin::BI__builtin_isinf_sign: {
3985 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
3986 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3987 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3988 Value *Arg = EmitScalarExpr(E->getArg(0));
3989 Value *AbsArg = EmitFAbs(*this, Arg);
3990 Value *IsInf = Builder.CreateFCmpOEQ(
3991 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
3992 Value *IsNeg = EmitSignBit(*this, Arg);
3993
3994 llvm::Type *IntTy = ConvertType(E->getType());
3995 Value *Zero = Constant::getNullValue(IntTy);
3996 Value *One = ConstantInt::get(IntTy, 1);
3997 Value *NegativeOne = ConstantInt::get(IntTy, -1);
3998 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
3999 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4000 return RValue::get(Result);
4001 }
4002
4003 case Builtin::BI__builtin_flt_rounds: {
4004 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4005
4006 llvm::Type *ResultType = ConvertType(E->getType());
4007 Value *Result = Builder.CreateCall(F);
4008 if (Result->getType() != ResultType)
4009 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4010 "cast");
4011 return RValue::get(Result);
4012 }
4013
4014 case Builtin::BI__builtin_set_flt_rounds: {
4015 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4016
4017 Value *V = EmitScalarExpr(E->getArg(0));
4018 Builder.CreateCall(F, V);
4019 return RValue::get(nullptr);
4020 }
4021
4022 case Builtin::BI__builtin_fpclassify: {
4023 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4024 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4025 Value *V = EmitScalarExpr(E->getArg(5));
4026 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4027
4028 // Create Result
4029 BasicBlock *Begin = Builder.GetInsertBlock();
4030 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4031 Builder.SetInsertPoint(End);
4032 PHINode *Result =
4033 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4034 "fpclassify_result");
4035
4036 // if (V==0) return FP_ZERO
4037 Builder.SetInsertPoint(Begin);
4038 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4039 "iszero");
4040 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4041 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4042 Builder.CreateCondBr(IsZero, End, NotZero);
4043 Result->addIncoming(ZeroLiteral, Begin);
4044
4045 // if (V != V) return FP_NAN
4046 Builder.SetInsertPoint(NotZero);
4047 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4048 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4049 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4050 Builder.CreateCondBr(IsNan, End, NotNan);
4051 Result->addIncoming(NanLiteral, NotZero);
4052
4053 // if (fabs(V) == infinity) return FP_INFINITY
4054 Builder.SetInsertPoint(NotNan);
4055 Value *VAbs = EmitFAbs(*this, V);
4056 Value *IsInf =
4057 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4058 "isinf");
4059 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4060 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4061 Builder.CreateCondBr(IsInf, End, NotInf);
4062 Result->addIncoming(InfLiteral, NotNan);
4063
4064 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4065 Builder.SetInsertPoint(NotInf);
4066 APFloat Smallest = APFloat::getSmallestNormalized(
4067 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4068 Value *IsNormal =
4069 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4070 "isnormal");
4071 Value *NormalResult =
4072 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4073 EmitScalarExpr(E->getArg(3)));
4074 Builder.CreateBr(End);
4075 Result->addIncoming(NormalResult, NotInf);
4076
4077 // return Result
4078 Builder.SetInsertPoint(End);
4079 return RValue::get(Result);
4080 }
4081
4082 // An alloca will always return a pointer to the alloca (stack) address
4083 // space. This address space need not be the same as the AST / Language
4084 // default (e.g. in C / C++ auto vars are in the generic address space). At
4085 // the AST level this is handled within CreateTempAlloca et al., but for the
4086 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4087 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4088 case Builtin::BIalloca:
4089 case Builtin::BI_alloca:
4090 case Builtin::BI__builtin_alloca_uninitialized:
4091 case Builtin::BI__builtin_alloca: {
4092 Value *Size = EmitScalarExpr(E->getArg(0));
4093 const TargetInfo &TI = getContext().getTargetInfo();
4094 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4095 const Align SuitableAlignmentInBytes =
4096 CGM.getContext()
4098 .getAsAlign();
4099 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4100 AI->setAlignment(SuitableAlignmentInBytes);
4101 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4102 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4105 if (AAS != EAS) {
4106 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4107 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4108 EAS, Ty));
4109 }
4110 return RValue::get(AI);
4111 }
4112
4113 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4114 case Builtin::BI__builtin_alloca_with_align: {
4115 Value *Size = EmitScalarExpr(E->getArg(0));
4116 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4117 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4118 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4119 const Align AlignmentInBytes =
4120 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4121 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4122 AI->setAlignment(AlignmentInBytes);
4123 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4124 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4127 if (AAS != EAS) {
4128 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4129 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4130 EAS, Ty));
4131 }
4132 return RValue::get(AI);
4133 }
4134
4135 case Builtin::BIbzero:
4136 case Builtin::BI__builtin_bzero: {
4138 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4139 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4140 E->getArg(0)->getExprLoc(), FD, 0);
4141 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4142 return RValue::get(nullptr);
4143 }
4144
4145 case Builtin::BIbcopy:
4146 case Builtin::BI__builtin_bcopy: {
4149 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4151 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4152 0);
4154 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4155 0);
4156 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4157 return RValue::get(nullptr);
4158 }
4159
4160 case Builtin::BImemcpy:
4161 case Builtin::BI__builtin_memcpy:
4162 case Builtin::BImempcpy:
4163 case Builtin::BI__builtin_mempcpy: {
4166 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4167 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4168 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4169 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4170 if (BuiltinID == Builtin::BImempcpy ||
4171 BuiltinID == Builtin::BI__builtin_mempcpy)
4173 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4174 else
4175 return RValue::get(Dest, *this);
4176 }
4177
4178 case Builtin::BI__builtin_memcpy_inline: {
4181 uint64_t Size =
4182 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4183 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4184 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4185 Builder.CreateMemCpyInline(Dest, Src, Size);
4186 return RValue::get(nullptr);
4187 }
4188
4189 case Builtin::BI__builtin_char_memchr:
4190 BuiltinID = Builtin::BI__builtin_memchr;
4191 break;
4192
4193 case Builtin::BI__builtin___memcpy_chk: {
4194 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4195 Expr::EvalResult SizeResult, DstSizeResult;
4196 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4197 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4198 break;
4199 llvm::APSInt Size = SizeResult.Val.getInt();
4200 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4201 if (Size.ugt(DstSize))
4202 break;
4205 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4206 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4207 return RValue::get(Dest, *this);
4208 }
4209
4210 case Builtin::BI__builtin_objc_memmove_collectable: {
4211 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4212 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4213 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4215 DestAddr, SrcAddr, SizeVal);
4216 return RValue::get(DestAddr, *this);
4217 }
4218
4219 case Builtin::BI__builtin___memmove_chk: {
4220 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4221 Expr::EvalResult SizeResult, DstSizeResult;
4222 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4223 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4224 break;
4225 llvm::APSInt Size = SizeResult.Val.getInt();
4226 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4227 if (Size.ugt(DstSize))
4228 break;
4231 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4232 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4233 return RValue::get(Dest, *this);
4234 }
4235
4236 case Builtin::BImemmove:
4237 case Builtin::BI__builtin_memmove: {
4240 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4241 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4242 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4243 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4244 return RValue::get(Dest, *this);
4245 }
4246 case Builtin::BImemset:
4247 case Builtin::BI__builtin_memset: {
4249 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4250 Builder.getInt8Ty());
4251 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4252 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4253 E->getArg(0)->getExprLoc(), FD, 0);
4254 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4255 return RValue::get(Dest, *this);
4256 }
4257 case Builtin::BI__builtin_memset_inline: {
4259 Value *ByteVal =
4260 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4261 uint64_t Size =
4262 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4264 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4265 0);
4266 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4267 return RValue::get(nullptr);
4268 }
4269 case Builtin::BI__builtin___memset_chk: {
4270 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4271 Expr::EvalResult SizeResult, DstSizeResult;
4272 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4273 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4274 break;
4275 llvm::APSInt Size = SizeResult.Val.getInt();
4276 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4277 if (Size.ugt(DstSize))
4278 break;
4280 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4281 Builder.getInt8Ty());
4282 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4283 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4284 return RValue::get(Dest, *this);
4285 }
4286 case Builtin::BI__builtin_wmemchr: {
4287 // The MSVC runtime library does not provide a definition of wmemchr, so we
4288 // need an inline implementation.
4289 if (!getTarget().getTriple().isOSMSVCRT())
4290 break;
4291
4292 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4293 Value *Str = EmitScalarExpr(E->getArg(0));
4294 Value *Chr = EmitScalarExpr(E->getArg(1));
4295 Value *Size = EmitScalarExpr(E->getArg(2));
4296
4297 BasicBlock *Entry = Builder.GetInsertBlock();
4298 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4299 BasicBlock *Next = createBasicBlock("wmemchr.next");
4300 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4301 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4302 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4303
4304 EmitBlock(CmpEq);
4305 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4306 StrPhi->addIncoming(Str, Entry);
4307 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4308 SizePhi->addIncoming(Size, Entry);
4309 CharUnits WCharAlign =
4311 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4312 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4313 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4314 Builder.CreateCondBr(StrEqChr, Exit, Next);
4315
4316 EmitBlock(Next);
4317 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4318 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4319 Value *NextSizeEq0 =
4320 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4321 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4322 StrPhi->addIncoming(NextStr, Next);
4323 SizePhi->addIncoming(NextSize, Next);
4324
4325 EmitBlock(Exit);
4326 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4327 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4328 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4329 Ret->addIncoming(FoundChr, CmpEq);
4330 return RValue::get(Ret);
4331 }
4332 case Builtin::BI__builtin_wmemcmp: {
4333 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4334 // need an inline implementation.
4335 if (!getTarget().getTriple().isOSMSVCRT())
4336 break;
4337
4338 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4339
4340 Value *Dst = EmitScalarExpr(E->getArg(0));
4341 Value *Src = EmitScalarExpr(E->getArg(1));
4342 Value *Size = EmitScalarExpr(E->getArg(2));
4343
4344 BasicBlock *Entry = Builder.GetInsertBlock();
4345 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4346 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4347 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4348 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4349 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4350 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4351
4352 EmitBlock(CmpGT);
4353 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4354 DstPhi->addIncoming(Dst, Entry);
4355 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4356 SrcPhi->addIncoming(Src, Entry);
4357 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4358 SizePhi->addIncoming(Size, Entry);
4359 CharUnits WCharAlign =
4361 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4362 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4363 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4364 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4365
4366 EmitBlock(CmpLT);
4367 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4368 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4369
4370 EmitBlock(Next);
4371 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4372 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4373 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4374 Value *NextSizeEq0 =
4375 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4376 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4377 DstPhi->addIncoming(NextDst, Next);
4378 SrcPhi->addIncoming(NextSrc, Next);
4379 SizePhi->addIncoming(NextSize, Next);
4380
4381 EmitBlock(Exit);
4382 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4383 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4384 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4385 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4386 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4387 return RValue::get(Ret);
4388 }
4389 case Builtin::BI__builtin_dwarf_cfa: {
4390 // The offset in bytes from the first argument to the CFA.
4391 //
4392 // Why on earth is this in the frontend? Is there any reason at
4393 // all that the backend can't reasonably determine this while
4394 // lowering llvm.eh.dwarf.cfa()?
4395 //
4396 // TODO: If there's a satisfactory reason, add a target hook for
4397 // this instead of hard-coding 0, which is correct for most targets.
4398 int32_t Offset = 0;
4399
4400 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4401 return RValue::get(Builder.CreateCall(F,
4402 llvm::ConstantInt::get(Int32Ty, Offset)));
4403 }
4404 case Builtin::BI__builtin_return_address: {
4405 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4406 getContext().UnsignedIntTy);
4407 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4408 return RValue::get(Builder.CreateCall(F, Depth));
4409 }
4410 case Builtin::BI_ReturnAddress: {
4411 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4412 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4413 }
4414 case Builtin::BI__builtin_frame_address: {
4415 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4416 getContext().UnsignedIntTy);
4417 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4418 return RValue::get(Builder.CreateCall(F, Depth));
4419 }
4420 case Builtin::BI__builtin_extract_return_addr: {
4423 return RValue::get(Result);
4424 }
4425 case Builtin::BI__builtin_frob_return_addr: {
4428 return RValue::get(Result);
4429 }
4430 case Builtin::BI__builtin_dwarf_sp_column: {
4431 llvm::IntegerType *Ty
4432 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4434 if (Column == -1) {
4435 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4436 return RValue::get(llvm::UndefValue::get(Ty));
4437 }
4438 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4439 }
4440 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4442 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4443 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4444 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4445 }
4446 case Builtin::BI__builtin_eh_return: {
4447 Value *Int = EmitScalarExpr(E->getArg(0));
4448 Value *Ptr = EmitScalarExpr(E->getArg(1));
4449
4450 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4451 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4452 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4453 Function *F =
4454 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4455 : Intrinsic::eh_return_i64);
4456 Builder.CreateCall(F, {Int, Ptr});
4457 Builder.CreateUnreachable();
4458
4459 // We do need to preserve an insertion point.
4460 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4461
4462 return RValue::get(nullptr);
4463 }
4464 case Builtin::BI__builtin_unwind_init: {
4465 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4466 Builder.CreateCall(F);
4467 return RValue::get(nullptr);
4468 }
4469 case Builtin::BI__builtin_extend_pointer: {
4470 // Extends a pointer to the size of an _Unwind_Word, which is
4471 // uint64_t on all platforms. Generally this gets poked into a
4472 // register and eventually used as an address, so if the
4473 // addressing registers are wider than pointers and the platform
4474 // doesn't implicitly ignore high-order bits when doing
4475 // addressing, we need to make sure we zext / sext based on
4476 // the platform's expectations.
4477 //
4478 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4479
4480 // Cast the pointer to intptr_t.
4481 Value *Ptr = EmitScalarExpr(E->getArg(0));
4482 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4483
4484 // If that's 64 bits, we're done.
4485 if (IntPtrTy->getBitWidth() == 64)
4486 return RValue::get(Result);
4487
4488 // Otherwise, ask the codegen data what to do.
4489 if (getTargetHooks().extendPointerWithSExt())
4490 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4491 else
4492 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4493 }
4494 case Builtin::BI__builtin_setjmp: {
4495 // Buffer is a void**.
4497
4498 // Store the frame pointer to the setjmp buffer.
4499 Value *FrameAddr = Builder.CreateCall(
4500 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4501 ConstantInt::get(Int32Ty, 0));
4502 Builder.CreateStore(FrameAddr, Buf);
4503
4504 // Store the stack pointer to the setjmp buffer.
4505 Value *StackAddr = Builder.CreateStackSave();
4506 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4507
4508 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4509 Builder.CreateStore(StackAddr, StackSaveSlot);
4510
4511 // Call LLVM's EH setjmp, which is lightweight.
4512 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4513 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4514 }
4515 case Builtin::BI__builtin_longjmp: {
4516 Value *Buf = EmitScalarExpr(E->getArg(0));
4517
4518 // Call LLVM's EH longjmp, which is lightweight.
4519 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4520
4521 // longjmp doesn't return; mark this as unreachable.
4522 Builder.CreateUnreachable();
4523
4524 // We do need to preserve an insertion point.
4525 EmitBlock(createBasicBlock("longjmp.cont"));
4526
4527 return RValue::get(nullptr);
4528 }
4529 case Builtin::BI__builtin_launder: {
4530 const Expr *Arg = E->getArg(0);
4531 QualType ArgTy = Arg->getType()->getPointeeType();
4532 Value *Ptr = EmitScalarExpr(Arg);
4533 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4535
4536 return RValue::get(Ptr);
4537 }
4538 case Builtin::BI__sync_fetch_and_add:
4539 case Builtin::BI__sync_fetch_and_sub:
4540 case Builtin::BI__sync_fetch_and_or:
4541 case Builtin::BI__sync_fetch_and_and:
4542 case Builtin::BI__sync_fetch_and_xor:
4543 case Builtin::BI__sync_fetch_and_nand:
4544 case Builtin::BI__sync_add_and_fetch:
4545 case Builtin::BI__sync_sub_and_fetch:
4546 case Builtin::BI__sync_and_and_fetch:
4547 case Builtin::BI__sync_or_and_fetch:
4548 case Builtin::BI__sync_xor_and_fetch:
4549 case Builtin::BI__sync_nand_and_fetch:
4550 case Builtin::BI__sync_val_compare_and_swap:
4551 case Builtin::BI__sync_bool_compare_and_swap:
4552 case Builtin::BI__sync_lock_test_and_set:
4553 case Builtin::BI__sync_lock_release:
4554 case Builtin::BI__sync_swap:
4555 llvm_unreachable("Shouldn't make it through sema");
4556 case Builtin::BI__sync_fetch_and_add_1:
4557 case Builtin::BI__sync_fetch_and_add_2:
4558 case Builtin::BI__sync_fetch_and_add_4:
4559 case Builtin::BI__sync_fetch_and_add_8:
4560 case Builtin::BI__sync_fetch_and_add_16:
4561 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4562 case Builtin::BI__sync_fetch_and_sub_1:
4563 case Builtin::BI__sync_fetch_and_sub_2:
4564 case Builtin::BI__sync_fetch_and_sub_4:
4565 case Builtin::BI__sync_fetch_and_sub_8:
4566 case Builtin::BI__sync_fetch_and_sub_16:
4567 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4568 case Builtin::BI__sync_fetch_and_or_1:
4569 case Builtin::BI__sync_fetch_and_or_2:
4570 case Builtin::BI__sync_fetch_and_or_4:
4571 case Builtin::BI__sync_fetch_and_or_8:
4572 case Builtin::BI__sync_fetch_and_or_16:
4573 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4574 case Builtin::BI__sync_fetch_and_and_1:
4575 case Builtin::BI__sync_fetch_and_and_2:
4576 case Builtin::BI__sync_fetch_and_and_4:
4577 case Builtin::BI__sync_fetch_and_and_8:
4578 case Builtin::BI__sync_fetch_and_and_16:
4579 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4580 case Builtin::BI__sync_fetch_and_xor_1:
4581 case Builtin::BI__sync_fetch_and_xor_2:
4582 case Builtin::BI__sync_fetch_and_xor_4:
4583 case Builtin::BI__sync_fetch_and_xor_8:
4584 case Builtin::BI__sync_fetch_and_xor_16:
4585 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4586 case Builtin::BI__sync_fetch_and_nand_1:
4587 case Builtin::BI__sync_fetch_and_nand_2:
4588 case Builtin::BI__sync_fetch_and_nand_4:
4589 case Builtin::BI__sync_fetch_and_nand_8:
4590 case Builtin::BI__sync_fetch_and_nand_16:
4591 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4592
4593 // Clang extensions: not overloaded yet.
4594 case Builtin::BI__sync_fetch_and_min:
4595 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4596 case Builtin::BI__sync_fetch_and_max:
4597 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4598 case Builtin::BI__sync_fetch_and_umin:
4599 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4600 case Builtin::BI__sync_fetch_and_umax:
4601 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4602
4603 case Builtin::BI__sync_add_and_fetch_1:
4604 case Builtin::BI__sync_add_and_fetch_2:
4605 case Builtin::BI__sync_add_and_fetch_4:
4606 case Builtin::BI__sync_add_and_fetch_8:
4607 case Builtin::BI__sync_add_and_fetch_16:
4608 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4609 llvm::Instruction::Add);
4610 case Builtin::BI__sync_sub_and_fetch_1:
4611 case Builtin::BI__sync_sub_and_fetch_2:
4612 case Builtin::BI__sync_sub_and_fetch_4:
4613 case Builtin::BI__sync_sub_and_fetch_8:
4614 case Builtin::BI__sync_sub_and_fetch_16:
4615 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4616 llvm::Instruction::Sub);
4617 case Builtin::BI__sync_and_and_fetch_1:
4618 case Builtin::BI__sync_and_and_fetch_2:
4619 case Builtin::BI__sync_and_and_fetch_4:
4620 case Builtin::BI__sync_and_and_fetch_8:
4621 case Builtin::BI__sync_and_and_fetch_16:
4622 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4623 llvm::Instruction::And);
4624 case Builtin::BI__sync_or_and_fetch_1:
4625 case Builtin::BI__sync_or_and_fetch_2:
4626 case Builtin::BI__sync_or_and_fetch_4:
4627 case Builtin::BI__sync_or_and_fetch_8:
4628 case Builtin::BI__sync_or_and_fetch_16:
4629 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
4630 llvm::Instruction::Or);
4631 case Builtin::BI__sync_xor_and_fetch_1:
4632 case Builtin::BI__sync_xor_and_fetch_2:
4633 case Builtin::BI__sync_xor_and_fetch_4:
4634 case Builtin::BI__sync_xor_and_fetch_8:
4635 case Builtin::BI__sync_xor_and_fetch_16:
4636 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
4637 llvm::Instruction::Xor);
4638 case Builtin::BI__sync_nand_and_fetch_1:
4639 case Builtin::BI__sync_nand_and_fetch_2:
4640 case Builtin::BI__sync_nand_and_fetch_4:
4641 case Builtin::BI__sync_nand_and_fetch_8:
4642 case Builtin::BI__sync_nand_and_fetch_16:
4643 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
4644 llvm::Instruction::And, true);
4645
4646 case Builtin::BI__sync_val_compare_and_swap_1:
4647 case Builtin::BI__sync_val_compare_and_swap_2:
4648 case Builtin::BI__sync_val_compare_and_swap_4:
4649 case Builtin::BI__sync_val_compare_and_swap_8:
4650 case Builtin::BI__sync_val_compare_and_swap_16:
4651 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
4652
4653 case Builtin::BI__sync_bool_compare_and_swap_1:
4654 case Builtin::BI__sync_bool_compare_and_swap_2:
4655 case Builtin::BI__sync_bool_compare_and_swap_4:
4656 case Builtin::BI__sync_bool_compare_and_swap_8:
4657 case Builtin::BI__sync_bool_compare_and_swap_16:
4658 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
4659
4660 case Builtin::BI__sync_swap_1:
4661 case Builtin::BI__sync_swap_2:
4662 case Builtin::BI__sync_swap_4:
4663 case Builtin::BI__sync_swap_8:
4664 case Builtin::BI__sync_swap_16:
4665 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4666
4667 case Builtin::BI__sync_lock_test_and_set_1:
4668 case Builtin::BI__sync_lock_test_and_set_2:
4669 case Builtin::BI__sync_lock_test_and_set_4:
4670 case Builtin::BI__sync_lock_test_and_set_8:
4671 case Builtin::BI__sync_lock_test_and_set_16:
4672 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4673
4674 case Builtin::BI__sync_lock_release_1:
4675 case Builtin::BI__sync_lock_release_2:
4676 case Builtin::BI__sync_lock_release_4:
4677 case Builtin::BI__sync_lock_release_8:
4678 case Builtin::BI__sync_lock_release_16: {
4679 Address Ptr = CheckAtomicAlignment(*this, E);
4680 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4681
4682 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4683 getContext().getTypeSize(ElTy));
4684 llvm::StoreInst *Store =
4685 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
4686 Store->setAtomic(llvm::AtomicOrdering::Release);
4687 return RValue::get(nullptr);
4688 }
4689
4690 case Builtin::BI__sync_synchronize: {
4691 // We assume this is supposed to correspond to a C++0x-style
4692 // sequentially-consistent fence (i.e. this is only usable for
4693 // synchronization, not device I/O or anything like that). This intrinsic
4694 // is really badly designed in the sense that in theory, there isn't
4695 // any way to safely use it... but in practice, it mostly works
4696 // to use it with non-atomic loads and stores to get acquire/release
4697 // semantics.
4698 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
4699 return RValue::get(nullptr);
4700 }
4701
4702 case Builtin::BI__builtin_nontemporal_load:
4703 return RValue::get(EmitNontemporalLoad(*this, E));
4704 case Builtin::BI__builtin_nontemporal_store:
4705 return RValue::get(EmitNontemporalStore(*this, E));
4706 case Builtin::BI__c11_atomic_is_lock_free:
4707 case Builtin::BI__atomic_is_lock_free: {
4708 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4709 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4710 // _Atomic(T) is always properly-aligned.
4711 const char *LibCallName = "__atomic_is_lock_free";
4712 CallArgList Args;
4713 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
4714 getContext().getSizeType());
4715 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4716 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
4718 else
4719 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
4721 const CGFunctionInfo &FuncInfo =
4723 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
4724 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
4725 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
4726 ReturnValueSlot(), Args);
4727 }
4728
4729 case Builtin::BI__atomic_test_and_set: {
4730 // Look at the argument type to determine whether this is a volatile
4731 // operation. The parameter type is always volatile.
4732 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4733 bool Volatile =
4734 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4735
4736 Address Ptr =
4738
4739 Value *NewVal = Builder.getInt8(1);
4740 Value *Order = EmitScalarExpr(E->getArg(1));
4741 if (isa<llvm::ConstantInt>(Order)) {
4742 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4743 AtomicRMWInst *Result = nullptr;
4744 switch (ord) {
4745 case 0: // memory_order_relaxed
4746 default: // invalid order
4747 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4748 llvm::AtomicOrdering::Monotonic);
4749 break;
4750 case 1: // memory_order_consume
4751 case 2: // memory_order_acquire
4752 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4753 llvm::AtomicOrdering::Acquire);
4754 break;
4755 case 3: // memory_order_release
4756 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4757 llvm::AtomicOrdering::Release);
4758 break;
4759 case 4: // memory_order_acq_rel
4760
4761 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4762 llvm::AtomicOrdering::AcquireRelease);
4763 break;
4764 case 5: // memory_order_seq_cst
4766 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4767 llvm::AtomicOrdering::SequentiallyConsistent);
4768 break;
4769 }
4770 Result->setVolatile(Volatile);
4771 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4772 }
4773
4774 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4775
4776 llvm::BasicBlock *BBs[5] = {
4777 createBasicBlock("monotonic", CurFn),
4778 createBasicBlock("acquire", CurFn),
4779 createBasicBlock("release", CurFn),
4780 createBasicBlock("acqrel", CurFn),
4781 createBasicBlock("seqcst", CurFn)
4782 };
4783 llvm::AtomicOrdering Orders[5] = {
4784 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4785 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4786 llvm::AtomicOrdering::SequentiallyConsistent};
4787
4788 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4789 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4790
4791 Builder.SetInsertPoint(ContBB);
4792 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4793
4794 for (unsigned i = 0; i < 5; ++i) {
4795 Builder.SetInsertPoint(BBs[i]);
4796 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4797 Ptr, NewVal, Orders[i]);
4798 RMW->setVolatile(Volatile);
4799 Result->addIncoming(RMW, BBs[i]);
4800 Builder.CreateBr(ContBB);
4801 }
4802
4803 SI->addCase(Builder.getInt32(0), BBs[0]);
4804 SI->addCase(Builder.getInt32(1), BBs[1]);
4805 SI->addCase(Builder.getInt32(2), BBs[1]);
4806 SI->addCase(Builder.getInt32(3), BBs[2]);
4807 SI->addCase(Builder.getInt32(4), BBs[3]);
4808 SI->addCase(Builder.getInt32(5), BBs[4]);
4809
4810 Builder.SetInsertPoint(ContBB);
4811 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4812 }
4813
4814 case Builtin::BI__atomic_clear: {
4815 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4816 bool Volatile =
4817 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4818
4820 Ptr = Ptr.withElementType(Int8Ty);
4821 Value *NewVal = Builder.getInt8(0);
4822 Value *Order = EmitScalarExpr(E->getArg(1));
4823 if (isa<llvm::ConstantInt>(Order)) {
4824 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4825 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4826 switch (ord) {
4827 case 0: // memory_order_relaxed
4828 default: // invalid order
4829 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4830 break;
4831 case 3: // memory_order_release
4832 Store->setOrdering(llvm::AtomicOrdering::Release);
4833 break;
4834 case 5: // memory_order_seq_cst
4835 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4836 break;
4837 }
4838 return RValue::get(nullptr);
4839 }
4840
4841 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4842
4843 llvm::BasicBlock *BBs[3] = {
4844 createBasicBlock("monotonic", CurFn),
4845 createBasicBlock("release", CurFn),
4846 createBasicBlock("seqcst", CurFn)
4847 };
4848 llvm::AtomicOrdering Orders[3] = {
4849 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4850 llvm::AtomicOrdering::SequentiallyConsistent};
4851
4852 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4853 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4854
4855 for (unsigned i = 0; i < 3; ++i) {
4856 Builder.SetInsertPoint(BBs[i]);
4857 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4858 Store->setOrdering(Orders[i]);
4859 Builder.CreateBr(ContBB);
4860 }
4861
4862 SI->addCase(Builder.getInt32(0), BBs[0]);
4863 SI->addCase(Builder.getInt32(3), BBs[1]);
4864 SI->addCase(Builder.getInt32(5), BBs[2]);
4865
4866 Builder.SetInsertPoint(ContBB);
4867 return RValue::get(nullptr);
4868 }
4869
4870 case Builtin::BI__atomic_thread_fence:
4871 case Builtin::BI__atomic_signal_fence:
4872 case Builtin::BI__c11_atomic_thread_fence:
4873 case Builtin::BI__c11_atomic_signal_fence: {
4874 llvm::SyncScope::ID SSID;
4875 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4876 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4877 SSID = llvm::SyncScope::SingleThread;
4878 else
4879 SSID = llvm::SyncScope::System;
4880 Value *Order = EmitScalarExpr(E->getArg(0));
4881 if (isa<llvm::ConstantInt>(Order)) {
4882 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4883 switch (ord) {
4884 case 0: // memory_order_relaxed
4885 default: // invalid order
4886 break;
4887 case 1: // memory_order_consume
4888 case 2: // memory_order_acquire
4889 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4890 break;
4891 case 3: // memory_order_release
4892 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4893 break;
4894 case 4: // memory_order_acq_rel
4895 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4896 break;
4897 case 5: // memory_order_seq_cst
4898 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4899 break;
4900 }
4901 return RValue::get(nullptr);
4902 }
4903
4904 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4905 AcquireBB = createBasicBlock("acquire", CurFn);
4906 ReleaseBB = createBasicBlock("release", CurFn);
4907 AcqRelBB = createBasicBlock("acqrel", CurFn);
4908 SeqCstBB = createBasicBlock("seqcst", CurFn);
4909 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4910
4911 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4912 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4913
4914 Builder.SetInsertPoint(AcquireBB);
4915 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4916 Builder.CreateBr(ContBB);
4917 SI->addCase(Builder.getInt32(1), AcquireBB);
4918 SI->addCase(Builder.getInt32(2), AcquireBB);
4919
4920 Builder.SetInsertPoint(ReleaseBB);
4921 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4922 Builder.CreateBr(ContBB);
4923 SI->addCase(Builder.getInt32(3), ReleaseBB);
4924
4925 Builder.SetInsertPoint(AcqRelBB);
4926 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4927 Builder.CreateBr(ContBB);
4928 SI->addCase(Builder.getInt32(4), AcqRelBB);
4929
4930 Builder.SetInsertPoint(SeqCstBB);
4931 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4932 Builder.CreateBr(ContBB);
4933 SI->addCase(Builder.getInt32(5), SeqCstBB);
4934
4935 Builder.SetInsertPoint(ContBB);
4936 return RValue::get(nullptr);
4937 }
4938
4939 case Builtin::BI__builtin_signbit:
4940 case Builtin::BI__builtin_signbitf:
4941 case Builtin::BI__builtin_signbitl: {
4942 return RValue::get(
4943 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
4944 ConvertType(E->getType())));
4945 }
4946 case Builtin::BI__warn_memset_zero_len:
4947 return RValue::getIgnored();
4948 case Builtin::BI__annotation: {
4949 // Re-encode each wide string to UTF8 and make an MDString.
4951 for (const Expr *Arg : E->arguments()) {
4952 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
4953 assert(Str->getCharByteWidth() == 2);
4954 StringRef WideBytes = Str->getBytes();
4955 std::string StrUtf8;
4956 if (!convertUTF16ToUTF8String(
4957 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
4958 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
4959 continue;
4960 }
4961 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
4962 }
4963
4964 // Build and MDTuple of MDStrings and emit the intrinsic call.
4965 llvm::Function *F =
4966 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
4967 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
4968 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
4969 return RValue::getIgnored();
4970 }
4971 case Builtin::BI__builtin_annotation: {
4972 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
4973 llvm::Function *F =
4974 CGM.getIntrinsic(llvm::Intrinsic::annotation,
4975 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
4976
4977 // Get the annotation string, go through casts. Sema requires this to be a
4978 // non-wide string literal, potentially casted, so the cast<> is safe.
4979 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
4980 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
4981 return RValue::get(
4982 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
4983 }
4984 case Builtin::BI__builtin_addcb:
4985 case Builtin::BI__builtin_addcs:
4986 case Builtin::BI__builtin_addc:
4987 case Builtin::BI__builtin_addcl:
4988 case Builtin::BI__builtin_addcll:
4989 case Builtin::BI__builtin_subcb:
4990 case Builtin::BI__builtin_subcs:
4991 case Builtin::BI__builtin_subc:
4992 case Builtin::BI__builtin_subcl:
4993 case Builtin::BI__builtin_subcll: {
4994
4995 // We translate all of these builtins from expressions of the form:
4996 // int x = ..., y = ..., carryin = ..., carryout, result;
4997 // result = __builtin_addc(x, y, carryin, &carryout);
4998 //
4999 // to LLVM IR of the form:
5000 //
5001 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5002 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5003 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5004 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5005 // i32 %carryin)
5006 // %result = extractvalue {i32, i1} %tmp2, 0
5007 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5008 // %tmp3 = or i1 %carry1, %carry2
5009 // %tmp4 = zext i1 %tmp3 to i32
5010 // store i32 %tmp4, i32* %carryout
5011
5012 // Scalarize our inputs.
5013 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5014 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5015 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5016 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5017
5018 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5019 llvm::Intrinsic::ID IntrinsicId;
5020 switch (BuiltinID) {
5021 default: llvm_unreachable("Unknown multiprecision builtin id.");
5022 case Builtin::BI__builtin_addcb:
5023 case Builtin::BI__builtin_addcs:
5024 case Builtin::BI__builtin_addc:
5025 case Builtin::BI__builtin_addcl:
5026 case Builtin::BI__builtin_addcll:
5027 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5028 break;
5029 case Builtin::BI__builtin_subcb:
5030 case Builtin::BI__builtin_subcs:
5031 case Builtin::BI__builtin_subc:
5032 case Builtin::BI__builtin_subcl:
5033 case Builtin::BI__builtin_subcll:
5034 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5035 break;
5036 }
5037
5038 // Construct our resulting LLVM IR expression.
5039 llvm::Value *Carry1;
5040 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5041 X, Y, Carry1);
5042 llvm::Value *Carry2;
5043 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5044 Sum1, Carryin, Carry2);
5045 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5046 X->getType());
5047 Builder.CreateStore(CarryOut, CarryOutPtr);
5048 return RValue::get(Sum2);
5049 }
5050
5051 case Builtin::BI__builtin_add_overflow:
5052 case Builtin::BI__builtin_sub_overflow:
5053 case Builtin::BI__builtin_mul_overflow: {
5054 const clang::Expr *LeftArg = E->getArg(0);
5055 const clang::Expr *RightArg = E->getArg(1);
5056 const clang::Expr *ResultArg = E->getArg(2);
5057
5058 clang::QualType ResultQTy =
5059 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5060
5061 WidthAndSignedness LeftInfo =
5063 WidthAndSignedness RightInfo =
5065 WidthAndSignedness ResultInfo =
5067
5068 // Handle mixed-sign multiplication as a special case, because adding
5069 // runtime or backend support for our generic irgen would be too expensive.
5070 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5071 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5072 RightInfo, ResultArg, ResultQTy,
5073 ResultInfo);
5074
5075 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5076 ResultInfo))
5078 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5079 ResultInfo);
5080
5081 WidthAndSignedness EncompassingInfo =
5082 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5083
5084 llvm::Type *EncompassingLLVMTy =
5085 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5086
5087 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5088
5089 llvm::Intrinsic::ID IntrinsicId;
5090 switch (BuiltinID) {
5091 default:
5092 llvm_unreachable("Unknown overflow builtin id.");
5093 case Builtin::BI__builtin_add_overflow:
5094 IntrinsicId = EncompassingInfo.Signed
5095 ? llvm::Intrinsic::sadd_with_overflow
5096 : llvm::Intrinsic::uadd_with_overflow;
5097 break;
5098 case Builtin::BI__builtin_sub_overflow:
5099 IntrinsicId = EncompassingInfo.Signed
5100 ? llvm::Intrinsic::ssub_with_overflow
5101 : llvm::Intrinsic::usub_with_overflow;
5102 break;
5103 case Builtin::BI__builtin_mul_overflow:
5104 IntrinsicId = EncompassingInfo.Signed
5105 ? llvm::Intrinsic::smul_with_overflow
5106 : llvm::Intrinsic::umul_with_overflow;
5107 break;
5108 }
5109
5110 llvm::Value *Left = EmitScalarExpr(LeftArg);
5111 llvm::Value *Right = EmitScalarExpr(RightArg);
5112 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5113
5114 // Extend each operand to the encompassing type.
5115 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5116 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5117
5118 // Perform the operation on the extended values.
5119 llvm::Value *Overflow, *Result;
5120 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5121
5122 if (EncompassingInfo.Width > ResultInfo.Width) {
5123 // The encompassing type is wider than the result type, so we need to
5124 // truncate it.
5125 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5126
5127 // To see if the truncation caused an overflow, we will extend
5128 // the result and then compare it to the original result.
5129 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5130 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5131 llvm::Value *TruncationOverflow =
5132 Builder.CreateICmpNE(Result, ResultTruncExt);
5133
5134 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5135 Result = ResultTrunc;
5136 }
5137
5138 // Finally, store the result using the pointer.
5139 bool isVolatile =
5140 ResultArg->getType()->getPointeeType().isVolatileQualified();
5141 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5142
5143 return RValue::get(Overflow);
5144 }
5145
5146 case Builtin::BI__builtin_uadd_overflow:
5147 case Builtin::BI__builtin_uaddl_overflow:
5148 case Builtin::BI__builtin_uaddll_overflow:
5149 case Builtin::BI__builtin_usub_overflow:
5150 case Builtin::BI__builtin_usubl_overflow:
5151 case Builtin::BI__builtin_usubll_overflow:
5152 case Builtin::BI__builtin_umul_overflow:
5153 case Builtin::BI__builtin_umull_overflow:
5154 case Builtin::BI__builtin_umulll_overflow:
5155 case Builtin::BI__builtin_sadd_overflow:
5156 case Builtin::BI__builtin_saddl_overflow:
5157 case Builtin::BI__builtin_saddll_overflow:
5158 case Builtin::BI__builtin_ssub_overflow:
5159 case Builtin::BI__builtin_ssubl_overflow:
5160 case Builtin::BI__builtin_ssubll_overflow:
5161 case Builtin::BI__builtin_smul_overflow:
5162 case Builtin::BI__builtin_smull_overflow:
5163 case Builtin::BI__builtin_smulll_overflow: {
5164
5165 // We translate all of these builtins directly to the relevant llvm IR node.
5166
5167 // Scalarize our inputs.
5168 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5169 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5170 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5171
5172 // Decide which of the overflow intrinsics we are lowering to:
5173 llvm::Intrinsic::ID IntrinsicId;
5174 switch (BuiltinID) {
5175 default: llvm_unreachable("Unknown overflow builtin id.");
5176 case Builtin::BI__builtin_uadd_overflow:
5177 case Builtin::BI__builtin_uaddl_overflow:
5178 case Builtin::BI__builtin_uaddll_overflow:
5179 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5180 break;
5181 case Builtin::BI__builtin_usub_overflow:
5182 case Builtin::BI__builtin_usubl_overflow:
5183 case Builtin::BI__builtin_usubll_overflow:
5184 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5185 break;
5186 case Builtin::BI__builtin_umul_overflow:
5187 case Builtin::BI__builtin_umull_overflow:
5188 case Builtin::BI__builtin_umulll_overflow:
5189 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5190 break;
5191 case Builtin::BI__builtin_sadd_overflow:
5192 case Builtin::BI__builtin_saddl_overflow:
5193 case Builtin::BI__builtin_saddll_overflow:
5194 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5195 break;
5196 case Builtin::BI__builtin_ssub_overflow:
5197 case Builtin::BI__builtin_ssubl_overflow:
5198 case Builtin::BI__builtin_ssubll_overflow:
5199 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5200 break;
5201 case Builtin::BI__builtin_smul_overflow:
5202 case Builtin::BI__builtin_smull_overflow:
5203 case Builtin::BI__builtin_smulll_overflow:
5204 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5205 break;
5206 }
5207
5208
5209 llvm::Value *Carry;
5210 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5211 Builder.CreateStore(Sum, SumOutPtr);
5212
5213 return RValue::get(Carry);
5214 }
5215 case Builtin::BIaddressof:
5216 case Builtin::BI__addressof:
5217 case Builtin::BI__builtin_addressof:
5218 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5219 case Builtin::BI__builtin_function_start:
5222 case Builtin::BI__builtin_operator_new:
5224 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5225 case Builtin::BI__builtin_operator_delete:
5227 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5228 return RValue::get(nullptr);
5229
5230 case Builtin::BI__builtin_is_aligned:
5231 return EmitBuiltinIsAligned(E);
5232 case Builtin::BI__builtin_align_up:
5233 return EmitBuiltinAlignTo(E, true);
5234 case Builtin::BI__builtin_align_down:
5235 return EmitBuiltinAlignTo(E, false);
5236
5237 case Builtin::BI__noop:
5238 // __noop always evaluates to an integer literal zero.
5239 return RValue::get(ConstantInt::get(IntTy, 0));
5240 case Builtin::BI__builtin_call_with_static_chain: {
5241 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5242 const Expr *Chain = E->getArg(1);
5243 return EmitCall(Call->getCallee()->getType(),
5244 EmitCallee(Call->getCallee()), Call, ReturnValue,
5245 EmitScalarExpr(Chain));
5246 }
5247 case Builtin::BI_InterlockedExchange8:
5248 case Builtin::BI_InterlockedExchange16:
5249 case Builtin::BI_InterlockedExchange:
5250 case Builtin::BI_InterlockedExchangePointer:
5251 return RValue::get(
5252 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5253 case Builtin::BI_InterlockedCompareExchangePointer:
5254 case Builtin::BI_InterlockedCompareExchangePointer_nf: {
5255 llvm::Type *RTy;
5256 llvm::IntegerType *IntType = IntegerType::get(
5258
5259 Address DestAddr = CheckAtomicAlignment(*this, E);
5260
5261 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
5262 RTy = Exchange->getType();
5263 Exchange = Builder.CreatePtrToInt(Exchange, IntType);
5264
5265 llvm::Value *Comparand =
5266 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
5267
5268 auto Ordering =
5269 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
5270 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
5271
5272 auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
5273 Ordering, Ordering);
5274 Result->setVolatile(true);
5275
5276 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
5277 0),
5278 RTy));
5279 }
5280 case Builtin::BI_InterlockedCompareExchange8:
5281 case Builtin::BI_InterlockedCompareExchange16:
5282 case Builtin::BI_InterlockedCompareExchange:
5283 case Builtin::BI_InterlockedCompareExchange64:
5284 return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
5285 case Builtin::BI_InterlockedIncrement16:
5286 case Builtin::BI_InterlockedIncrement:
5287 return RValue::get(
5288 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5289 case Builtin::BI_InterlockedDecrement16:
5290 case Builtin::BI_InterlockedDecrement:
5291 return RValue::get(
5292 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5293 case Builtin::BI_InterlockedAnd8:
5294 case Builtin::BI_InterlockedAnd16:
5295 case Builtin::BI_InterlockedAnd:
5296 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5297 case Builtin::BI_InterlockedExchangeAdd8:
5298 case Builtin::BI_InterlockedExchangeAdd16:
5299 case Builtin::BI_InterlockedExchangeAdd:
5300 return RValue::get(
5301 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5302 case Builtin::BI_InterlockedExchangeSub8:
5303 case Builtin::BI_InterlockedExchangeSub16:
5304 case Builtin::BI_InterlockedExchangeSub:
5305 return RValue::get(
5306 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5307 case Builtin::BI_InterlockedOr8:
5308 case Builtin::BI_InterlockedOr16:
5309 case Builtin::BI_InterlockedOr:
5310 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5311 case Builtin::BI_InterlockedXor8:
5312 case Builtin::BI_InterlockedXor16:
5313 case Builtin::BI_InterlockedXor:
5314 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5315
5316 case Builtin::BI_bittest64:
5317 case Builtin::BI_bittest:
5318 case Builtin::BI_bittestandcomplement64:
5319 case Builtin::BI_bittestandcomplement:
5320 case Builtin::BI_bittestandreset64:
5321 case Builtin::BI_bittestandreset:
5322 case Builtin::BI_bittestandset64:
5323 case Builtin::BI_bittestandset:
5324 case Builtin::BI_interlockedbittestandreset:
5325 case Builtin::BI_interlockedbittestandreset64:
5326 case Builtin::BI_interlockedbittestandset64:
5327 case Builtin::BI_interlockedbittestandset:
5328 case Builtin::BI_interlockedbittestandset_acq:
5329 case Builtin::BI_interlockedbittestandset_rel:
5330 case Builtin::BI_interlockedbittestandset_nf:
5331 case Builtin::BI_interlockedbittestandreset_acq:
5332 case Builtin::BI_interlockedbittestandreset_rel:
5333 case Builtin::BI_interlockedbittestandreset_nf:
5334 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5335
5336 // These builtins exist to emit regular volatile loads and stores not
5337 // affected by the -fms-volatile setting.
5338 case Builtin::BI__iso_volatile_load8:
5339 case Builtin::BI__iso_volatile_load16:
5340 case Builtin::BI__iso_volatile_load32:
5341 case Builtin::BI__iso_volatile_load64:
5342 return RValue::get(EmitISOVolatileLoad(*this, E));
5343 case Builtin::BI__iso_volatile_store8:
5344 case Builtin::BI__iso_volatile_store16:
5345 case Builtin::BI__iso_volatile_store32:
5346 case Builtin::BI__iso_volatile_store64:
5347 return RValue::get(EmitISOVolatileStore(*this, E));
5348
5349 case Builtin::BI__builtin_ptrauth_auth:
5350 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5351 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5352 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5353 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5354 case Builtin::BI__builtin_ptrauth_strip: {
5355 // Emit the arguments.
5357 for (auto argExpr : E->arguments())
5358 Args.push_back(EmitScalarExpr(argExpr));
5359
5360 // Cast the value to intptr_t, saving its original type.
5361 llvm::Type *OrigValueType = Args[0]->getType();
5362 if (OrigValueType->isPointerTy())
5363 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5364
5365 switch (BuiltinID) {
5366 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5367 if (Args[4]->getType()->isPointerTy())
5368 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5369 [[fallthrough]];
5370
5371 case Builtin::BI__builtin_ptrauth_auth:
5372 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5373 if (Args[2]->getType()->isPointerTy())
5374 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5375 break;
5376
5377 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5378 if (Args[1]->getType()->isPointerTy())
5379 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5380 break;
5381
5382 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5383 case Builtin::BI__builtin_ptrauth_strip:
5384 break;
5385 }
5386
5387 // Call the intrinsic.
5388 auto IntrinsicID = [&]() -> unsigned {
5389 switch (BuiltinID) {
5390 case Builtin::BI__builtin_ptrauth_auth:
5391 return llvm::Intrinsic::ptrauth_auth;
5392 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5393 return llvm::Intrinsic::ptrauth_resign;
5394 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5395 return llvm::Intrinsic::ptrauth_blend;
5396 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5397 return llvm::Intrinsic::ptrauth_sign_generic;
5398 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5399 return llvm::Intrinsic::ptrauth_sign;
5400 case Builtin::BI__builtin_ptrauth_strip:
5401 return llvm::Intrinsic::ptrauth_strip;
5402 }
5403 llvm_unreachable("bad ptrauth intrinsic");
5404 }();
5405 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5406 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5407
5408 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5409 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5410 OrigValueType->isPointerTy()) {
5411 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5412 }
5413 return RValue::get(Result);
5414 }
5415
5416 case Builtin::BI__exception_code:
5417 case Builtin::BI_exception_code:
5419 case Builtin::BI__exception_info:
5420 case Builtin::BI_exception_info:
5422 case Builtin::BI__abnormal_termination:
5423 case Builtin::BI_abnormal_termination:
5425 case Builtin::BI_setjmpex:
5426 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5427 E->getArg(0)->getType()->isPointerType())
5428 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5429 break;
5430 case Builtin::BI_setjmp:
5431 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5432 E->getArg(0)->getType()->isPointerType()) {
5433 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5434 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5435 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5436 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5437 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5438 }
5439 break;
5440
5441 // C++ std:: builtins.
5442 case Builtin::BImove:
5443 case Builtin::BImove_if_noexcept:
5444 case Builtin::BIforward:
5445 case Builtin::BIforward_like:
5446 case Builtin::BIas_const:
5447 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5448 case Builtin::BI__GetExceptionInfo: {
5449 if (llvm::GlobalVariable *GV =
5451 return RValue::get(GV);
5452 break;
5453 }
5454
5455 case Builtin::BI__fastfail:
5456 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5457
5458 case Builtin::BI__builtin_coro_id:
5459 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5460 case Builtin::BI__builtin_coro_promise:
5461 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5462 case Builtin::BI__builtin_coro_resume:
5463 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5464 return RValue::get(nullptr);
5465 case Builtin::BI__builtin_coro_frame:
5466 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5467 case Builtin::BI__builtin_coro_noop:
5468 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5469 case Builtin::BI__builtin_coro_free:
5470 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5471 case Builtin::BI__builtin_coro_destroy:
5472 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5473 return RValue::get(nullptr);
5474 case Builtin::BI__builtin_coro_done:
5475 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5476 case Builtin::BI__builtin_coro_alloc:
5477 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5478 case Builtin::BI__builtin_coro_begin:
5479 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5480 case Builtin::BI__builtin_coro_end:
5481 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5482 case Builtin::BI__builtin_coro_suspend:
5483 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5484 case Builtin::BI__builtin_coro_size:
5485 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5486 case Builtin::BI__builtin_coro_align:
5487 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5488
5489 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5490 case Builtin::BIread_pipe:
5491 case Builtin::BIwrite_pipe: {
5492 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5493 *Arg1 = EmitScalarExpr(E->getArg(1));
5494 CGOpenCLRuntime OpenCLRT(CGM);
5495 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5496 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5497
5498 // Type of the generic packet parameter.
5499 unsigned GenericAS =
5501 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5502
5503 // Testing which overloaded version we should generate the call for.
5504 if (2U == E->getNumArgs()) {
5505 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5506 : "__write_pipe_2";
5507 // Creating a generic function type to be able to call with any builtin or
5508 // user defined type.
5509 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5510 llvm::FunctionType *FTy = llvm::FunctionType::get(
5511 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5512 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
5513 return RValue::get(
5515 {Arg0, BCast, PacketSize, PacketAlign}));
5516 } else {
5517 assert(4 == E->getNumArgs() &&
5518 "Illegal number of parameters to pipe function");
5519 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5520 : "__write_pipe_4";
5521
5522 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5523 Int32Ty, Int32Ty};
5524 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5525 *Arg3 = EmitScalarExpr(E->getArg(3));
5526 llvm::FunctionType *FTy = llvm::FunctionType::get(
5527 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5528 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
5529 // We know the third argument is an integer type, but we may need to cast
5530 // it to i32.
5531 if (Arg2->getType() != Int32Ty)
5532 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5533 return RValue::get(
5535 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
5536 }
5537 }
5538 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5539 // functions
5540 case Builtin::BIreserve_read_pipe:
5541 case Builtin::BIreserve_write_pipe:
5542 case Builtin::BIwork_group_reserve_read_pipe:
5543 case Builtin::BIwork_group_reserve_write_pipe:
5544 case Builtin::BIsub_group_reserve_read_pipe:
5545 case Builtin::BIsub_group_reserve_write_pipe: {
5546 // Composing the mangled name for the function.
5547 const char *Name;
5548 if (BuiltinID == Builtin::BIreserve_read_pipe)
5549 Name = "__reserve_read_pipe";
5550 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5551 Name = "__reserve_write_pipe";
5552 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5553 Name = "__work_group_reserve_read_pipe";
5554 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5555 Name = "__work_group_reserve_write_pipe";
5556 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5557 Name = "__sub_group_reserve_read_pipe";
5558 else
5559 Name = "__sub_group_reserve_write_pipe";
5560
5561 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5562 *Arg1 = EmitScalarExpr(E->getArg(1));
5563 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5564 CGOpenCLRuntime OpenCLRT(CGM);
5565 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5566 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5567
5568 // Building the generic function prototype.
5569 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5570 llvm::FunctionType *FTy = llvm::FunctionType::get(
5571 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5572 // We know the second argument is an integer type, but we may need to cast
5573 // it to i32.
5574 if (Arg1->getType() != Int32Ty)
5575 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5577 {Arg0, Arg1, PacketSize, PacketAlign}));
5578 }
5579 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5580 // functions
5581 case Builtin::BIcommit_read_pipe:
5582 case Builtin::BIcommit_write_pipe:
5583 case Builtin::BIwork_group_commit_read_pipe:
5584 case Builtin::BIwork_group_commit_write_pipe:
5585 case Builtin::BIsub_group_commit_read_pipe:
5586 case Builtin::BIsub_group_commit_write_pipe: {
5587 const char *Name;
5588 if (BuiltinID == Builtin::BIcommit_read_pipe)
5589 Name = "__commit_read_pipe";
5590 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5591 Name = "__commit_write_pipe";
5592 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5593 Name = "__work_group_commit_read_pipe";
5594 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5595 Name = "__work_group_commit_write_pipe";
5596 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5597 Name = "__sub_group_commit_read_pipe";
5598 else
5599 Name = "__sub_group_commit_write_pipe";
5600
5601 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5602 *Arg1 = EmitScalarExpr(E->getArg(1));
5603 CGOpenCLRuntime OpenCLRT(CGM);
5604 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5605 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5606
5607 // Building the generic function prototype.
5608 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5609 llvm::FunctionType *FTy =
5610 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5611 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5612
5614 {Arg0, Arg1, PacketSize, PacketAlign}));
5615 }
5616 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5617 case Builtin::BIget_pipe_num_packets:
5618 case Builtin::BIget_pipe_max_packets: {
5619 const char *BaseName;
5620 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5621 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5622 BaseName = "__get_pipe_num_packets";
5623 else
5624 BaseName = "__get_pipe_max_packets";
5625 std::string Name = std::string(BaseName) +
5626 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
5627
5628 // Building the generic function prototype.
5629 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5630 CGOpenCLRuntime OpenCLRT(CGM);
5631 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5632 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5633 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
5634 llvm::FunctionType *FTy = llvm::FunctionType::get(
5635 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5636
5638 {Arg0, PacketSize, PacketAlign}));
5639 }
5640
5641 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
5642 case Builtin::BIto_global:
5643 case Builtin::BIto_local:
5644 case Builtin::BIto_private: {
5645 auto Arg0 = EmitScalarExpr(E->getArg(0));
5646 auto NewArgT = llvm::PointerType::get(
5649 auto NewRetT = llvm::PointerType::get(
5653 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
5654 llvm::Value *NewArg;
5655 if (Arg0->getType()->getPointerAddressSpace() !=
5656 NewArgT->getPointerAddressSpace())
5657 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
5658 else
5659 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
5660 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
5661 auto NewCall =
5662 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
5663 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
5664 ConvertType(E->getType())));
5665 }
5666
5667 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5668 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
5669 // The code below expands the builtin call to a call to one of the following
5670 // functions that an OpenCL runtime library will have to provide:
5671 // __enqueue_kernel_basic
5672 // __enqueue_kernel_varargs
5673 // __enqueue_kernel_basic_events
5674 // __enqueue_kernel_events_varargs
5675 case Builtin::BIenqueue_kernel: {
5676 StringRef Name; // Generated function call name
5677 unsigned NumArgs = E->getNumArgs();
5678
5679 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5680 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5681 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5682
5683 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
5684 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
5685 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
5686 llvm::Value *Range = NDRangeL.getAddress(*this).emitRawPointer(*this);
5687 llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
5688
5689 if (NumArgs == 4) {
5690 // The most basic form of the call with parameters:
5691 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5692 Name = "__enqueue_kernel_basic";
5693 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5694 GenericVoidPtrTy};
5695 llvm::FunctionType *FTy = llvm::FunctionType::get(
5696 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5697
5698 auto Info =
5700 llvm::Value *Kernel =
5701 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5702 llvm::Value *Block =
5703 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5704
5705 AttrBuilder B(Builder.getContext());
5706 B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
5707 llvm::AttributeList ByValAttrSet =
5708 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
5709
5710 auto RTCall =
5711 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
5712 {Queue, Flags, Range, Kernel, Block});
5713 RTCall->setAttributes(ByValAttrSet);
5714 return RValue::get(RTCall);
5715 }
5716 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5717
5718 // Create a temporary array to hold the sizes of local pointer arguments
5719 // for the block. \p First is the position of the first size argument.
5720 auto CreateArrayForSizeVar = [=](unsigned First)
5721 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5722 llvm::APInt ArraySize(32, NumArgs - First);
5724 getContext().getSizeType(), ArraySize, nullptr,
5726 /*IndexTypeQuals=*/0);
5727 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
5728 llvm::Value *TmpPtr = Tmp.getPointer();
5729 llvm::Value *TmpSize = EmitLifetimeStart(
5730 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5731 llvm::Value *ElemPtr;
5732 // Each of the following arguments specifies the size of the corresponding
5733 // argument passed to the enqueued block.
5734 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
5735 for (unsigned I = First; I < NumArgs; ++I) {
5736 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5737 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5738 {Zero, Index});
5739 if (I == First)
5740 ElemPtr = GEP;
5741 auto *V =
5742 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
5744 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
5745 }
5746 return std::tie(ElemPtr, TmpSize, TmpPtr);
5747 };
5748
5749 // Could have events and/or varargs.
5750 if (E->getArg(3)->getType()->isBlockPointerType()) {
5751 // No events passed, but has variadic arguments.
5752 Name = "__enqueue_kernel_varargs";
5753 auto Info =
5755 llvm::Value *Kernel =
5756 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5757 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5758 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5759 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
5760
5761 // Create a vector of the arguments, as well as a constant value to
5762 // express to the runtime the number of variadic arguments.
5763 llvm::Value *const Args[] = {Queue, Flags,
5764 Range, Kernel,
5765 Block, ConstantInt::get(IntTy, NumArgs - 4),
5766 ElemPtr};
5767 llvm::Type *const ArgTys[] = {
5768 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
5769 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5770
5771 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
5772 auto Call = RValue::get(
5773 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
5774 if (TmpSize)
5775 EmitLifetimeEnd(TmpSize, TmpPtr);
5776 return Call;
5777 }
5778 // Any calls now have event arguments passed.
5779 if (NumArgs >= 7) {
5780 llvm::PointerType *PtrTy = llvm::PointerType::get(
5783
5784 llvm::Value *NumEvents =
5785 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
5786
5787 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5788 // to be a null pointer constant (including `0` literal), we can take it
5789 // into account and emit null pointer directly.
5790 llvm::Value *EventWaitList = nullptr;
5791 if (E->getArg(4)->isNullPointerConstant(
5793 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
5794 } else {
5795 EventWaitList =
5796 E->getArg(4)->getType()->isArrayType()
5798 : EmitScalarExpr(E->getArg(4));
5799 // Convert to generic address space.
5800 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
5801 }
5802 llvm::Value *EventRet = nullptr;
5803 if (E->getArg(5)->isNullPointerConstant(
5805 EventRet = llvm::ConstantPointerNull::get(PtrTy);
5806 } else {
5807 EventRet =
5808 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
5809 }
5810
5811 auto Info =
5813 llvm::Value *Kernel =
5814 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5815 llvm::Value *Block =
5816 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5817
5818 std::vector<llvm::Type *> ArgTys = {
5819 QueueTy, Int32Ty, RangeTy, Int32Ty,
5820 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
5821
5822 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
5823 NumEvents, EventWaitList, EventRet,
5824 Kernel, Block};
5825
5826 if (NumArgs == 7) {
5827 // Has events but no variadics.
5828 Name = "__enqueue_kernel_basic_events";
5829 llvm::FunctionType *FTy = llvm::FunctionType::get(
5830 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5831 return RValue::get(
5834 }
5835 // Has event info and variadics
5836 // Pass the number of variadics to the runtime function too.
5837 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5838 ArgTys.push_back(Int32Ty);
5839 Name = "__enqueue_kernel_events_varargs";
5840
5841 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5842 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5843 Args.push_back(ElemPtr);
5844 ArgTys.push_back(ElemPtr->getType());
5845
5846 llvm::FunctionType *FTy = llvm::FunctionType::get(
5847 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5848 auto Call =
5851 if (TmpSize)
5852 EmitLifetimeEnd(TmpSize, TmpPtr);
5853 return Call;
5854 }
5855 llvm_unreachable("Unexpected enqueue_kernel signature");
5856 }
5857 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5858 // parameter.
5859 case Builtin::BIget_kernel_work_group_size: {
5860 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5861 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5862 auto Info =
5864 Value *Kernel =
5865 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5866 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5869 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5870 false),
5871 "__get_kernel_work_group_size_impl"),
5872 {Kernel, Arg}));
5873 }
5874 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5875 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5876 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5877 auto Info =
5879 Value *Kernel =
5880 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5881 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5884 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5885 false),
5886 "__get_kernel_preferred_work_group_size_multiple_impl"),
5887 {Kernel, Arg}));
5888 }
5889 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5890 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5891 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5892 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5893 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5894 llvm::Value *NDRange = NDRangeL.getAddress(*this).emitRawPointer(*this);
5895 auto Info =
5897 Value *Kernel =
5898 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5899 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5900 const char *Name =
5901 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5902 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5903 : "__get_kernel_sub_group_count_for_ndrange_impl";
5906 llvm::FunctionType::get(
5907 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5908 false),
5909 Name),
5910 {NDRange, Kernel, Block}));
5911 }
5912 case Builtin::BI__builtin_store_half:
5913 case Builtin::BI__builtin_store_halff: {
5914 Value *Val = EmitScalarExpr(E->getArg(0));
5916 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5917 Builder.CreateStore(HalfVal, Address);
5918 return RValue::get(nullptr);
5919 }
5920 case Builtin::BI__builtin_load_half: {
5922 Value *HalfVal = Builder.CreateLoad(Address);
5923 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5924 }
5925 case Builtin::BI__builtin_load_halff: {
5927 Value *HalfVal = Builder.CreateLoad(Address);
5928 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
5929 }
5930 case Builtin::BI__builtin_printf:
5931 case Builtin::BIprintf:
5932 if (getTarget().getTriple().isNVPTX() ||
5933 getTarget().getTriple().isAMDGCN()) {
5934 if (getLangOpts().OpenMPIsTargetDevice)
5936 if (getTarget().getTriple().isNVPTX())
5938 if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
5940 }
5941
5942 break;
5943 case Builtin::BI__builtin_canonicalize:
5944 case Builtin::BI__builtin_canonicalizef:
5945 case Builtin::BI__builtin_canonicalizef16:
5946 case Builtin::BI__builtin_canonicalizel:
5947 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
5948
5949 case Builtin::BI__builtin_thread_pointer: {
5950 if (!getContext().getTargetInfo().isTLSSupported())
5951 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
5952 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
5953 break;
5954 }
5955 case Builtin::BI__builtin_os_log_format:
5956 return emitBuiltinOSLogFormat(*E);
5957
5958 case Builtin::BI__xray_customevent: {
5960 return RValue::getIgnored();
5961
5964 return RValue::getIgnored();
5965
5966 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5967 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
5968 return RValue::getIgnored();
5969
5970 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
5971 auto FTy = F->getFunctionType();
5972 auto Arg0 = E->getArg(0);
5973 auto Arg0Val = EmitScalarExpr(Arg0);
5974 auto Arg0Ty = Arg0->getType();
5975 auto PTy0 = FTy->getParamType(0);
5976 if (PTy0 != Arg0Val->getType()) {
5977 if (Arg0Ty->isArrayType())
5978 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
5979 else
5980 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
5981 }
5982 auto Arg1 = EmitScalarExpr(E->getArg(1));
5983 auto PTy1 = FTy->getParamType(1);
5984 if (PTy1 != Arg1->getType())
5985 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
5986 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
5987 }
5988
5989 case Builtin::BI__xray_typedevent: {
5990 // TODO: There should be a way to always emit events even if the current
5991 // function is not instrumented. Losing events in a stream can cripple
5992 // a trace.
5994 return RValue::getIgnored();
5995
5998 return RValue::getIgnored();
5999
6000 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6001 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6002 return RValue::getIgnored();
6003
6004 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6005 auto FTy = F->getFunctionType();
6006 auto Arg0 = EmitScalarExpr(E->getArg(0));
6007 auto PTy0 = FTy->getParamType(0);
6008 if (PTy0 != Arg0->getType())
6009 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6010 auto Arg1 = E->getArg(1);
6011 auto Arg1Val = EmitScalarExpr(Arg1);
6012 auto Arg1Ty = Arg1->getType();
6013 auto PTy1 = FTy->getParamType(1);
6014 if (PTy1 != Arg1Val->getType()) {
6015 if (Arg1Ty->isArrayType())
6016 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6017 else
6018 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6019 }
6020 auto Arg2 = EmitScalarExpr(E->getArg(2));
6021 auto PTy2 = FTy->getParamType(2);
6022 if (PTy2 != Arg2->getType())
6023 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6024 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6025 }
6026
6027 case Builtin::BI__builtin_ms_va_start:
6028 case Builtin::BI__builtin_ms_va_end:
6029 return RValue::get(
6031 BuiltinID == Builtin::BI__builtin_ms_va_start));
6032
6033 case Builtin::BI__builtin_ms_va_copy: {
6034 // Lower this manually. We can't reliably determine whether or not any
6035 // given va_copy() is for a Win64 va_list from the calling convention
6036 // alone, because it's legal to do this from a System V ABI function.
6037 // With opaque pointer types, we won't have enough information in LLVM
6038 // IR to determine this from the argument types, either. Best to do it
6039 // now, while we have enough information.
6040 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6041 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6042
6043 DestAddr = DestAddr.withElementType(Int8PtrTy);
6044 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6045
6046 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6047 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6048 }
6049
6050 case Builtin::BI__builtin_get_device_side_mangled_name: {
6051 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6052 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6053 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6054 llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
6055 llvm::ConstantInt::get(SizeTy, 0)};
6056 auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
6057 Str.getPointer(), Zeros);
6058 return RValue::get(Ptr);
6059 }
6060 }
6061
6062 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6063 // the call using the normal call path, but using the unmangled
6064 // version of the function name.
6065 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6066 return emitLibraryCall(*this, FD, E,
6067 CGM.getBuiltinLibFunction(FD, BuiltinID));
6068
6069 // If this is a predefined lib function (e.g. malloc), emit the call
6070 // using exactly the normal call path.
6071 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6072 return emitLibraryCall(
6073 *this, FD, E, cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
6074
6075 // Check that a call to a target specific builtin has the correct target
6076 // features.
6077 // This is down here to avoid non-target specific builtins, however, if
6078 // generic builtins start to require generic target features then we
6079 // can move this up to the beginning of the function.
6080 checkTargetFeatures(E, FD);
6081
6082 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6083 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6084
6085 // See if we have a target specific intrinsic.
6086 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6087 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6088 StringRef Prefix =
6089 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6090 if (!Prefix.empty()) {
6091 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6092 // NOTE we don't need to perform a compatibility flag check here since the
6093 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6094 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6095 if (IntrinsicID == Intrinsic::not_intrinsic)
6096 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6097 }
6098
6099 if (IntrinsicID != Intrinsic::not_intrinsic) {
6101
6102 // Find out if any arguments are required to be integer constant
6103 // expressions.
6104 unsigned ICEArguments = 0;
6106 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6107 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6108
6109 Function *F = CGM.getIntrinsic(IntrinsicID);
6110 llvm::FunctionType *FTy = F->getFunctionType();
6111
6112 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6113 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6114 // If the intrinsic arg type is different from the builtin arg type
6115 // we need to do a bit cast.
6116 llvm::Type *PTy = FTy->getParamType(i);
6117 if (PTy != ArgValue->getType()) {
6118 // XXX - vector of pointers?
6119 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6120 if (PtrTy->getAddressSpace() !=
6121 ArgValue->getType()->getPointerAddressSpace()) {
6122 ArgValue = Builder.CreateAddrSpaceCast(
6123 ArgValue, llvm::PointerType::get(getLLVMContext(),
6124 PtrTy->getAddressSpace()));
6125 }
6126 }
6127
6128 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6129 // in amx intrinsics.
6130 if (PTy->isX86_AMXTy())
6131 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6132 {ArgValue->getType()}, {ArgValue});
6133 else
6134 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6135 }
6136
6137 Args.push_back(ArgValue);
6138 }
6139
6140 Value *V = Builder.CreateCall(F, Args);
6141 QualType BuiltinRetType = E->getType();
6142
6143 llvm::Type *RetTy = VoidTy;
6144 if (!BuiltinRetType->isVoidType())
6145 RetTy = ConvertType(BuiltinRetType);
6146
6147 if (RetTy != V->getType()) {
6148 // XXX - vector of pointers?
6149 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6150 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6152 V, llvm::PointerType::get(getLLVMContext(),
6153 PtrTy->getAddressSpace()));
6154 }
6155 }
6156
6157 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6158 // in amx intrinsics.
6159 if (V->getType()->isX86_AMXTy())
6160 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6161 {V});
6162 else
6163 V = Builder.CreateBitCast(V, RetTy);
6164 }
6165
6166 if (RetTy->isVoidTy())
6167 return RValue::get(nullptr);
6168
6169 return RValue::get(V);
6170 }
6171
6172 // Some target-specific builtins can have aggregate return values, e.g.
6173 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6174 // ReturnValue to be non-null, so that the target-specific emission code can
6175 // always just emit into it.
6177 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6178 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6179 ReturnValue = ReturnValueSlot(DestPtr, false);
6180 }
6181
6182 // Now see if we can emit a target-specific builtin.
6183 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6184 switch (EvalKind) {
6185 case TEK_Scalar:
6186 if (V->getType()->isVoidTy())
6187 return RValue::get(nullptr);
6188 return RValue::get(V);
6189 case TEK_Aggregate:
6190 return RValue::getAggregate(ReturnValue.getAddress(),
6191 ReturnValue.isVolatile());
6192 case TEK_Complex:
6193 llvm_unreachable("No current target builtin returns complex");
6194 }
6195 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6196 }
6197
6198 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6199 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E))
6200 return RValue::get(V);
6201
6202 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6203 return EmitHipStdParUnsupportedBuiltin(this, FD);
6204
6205 ErrorUnsupported(E, "builtin function");
6206
6207 // Unknown builtin, for now just dump it out and return undef.
6208 return GetUndefRValue(E->getType());
6209}
6210
6212 unsigned BuiltinID, const CallExpr *E,
6213 ReturnValueSlot ReturnValue,
6214 llvm::Triple::ArchType Arch) {
6215 // When compiling in HipStdPar mode we have to be conservative in rejecting
6216 // target specific features in the FE, and defer the possible error to the
6217 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6218 // referenced by an accelerator executable function, we emit an error.
6219 // Returning nullptr here leads to the builtin being handled in
6220 // EmitStdParUnsupportedBuiltin.
6221 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6222 Arch != CGF->getTarget().getTriple().getArch())
6223 return nullptr;
6224
6225 switch (Arch) {
6226 case llvm::Triple::arm:
6227 case llvm::Triple::armeb:
6228 case llvm::Triple::thumb:
6229 case llvm::Triple::thumbeb:
6230 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6231 case llvm::Triple::aarch64:
6232 case llvm::Triple::aarch64_32:
6233 case llvm::Triple::aarch64_be:
6234 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6235 case llvm::Triple::bpfeb:
6236 case llvm::Triple::bpfel:
6237 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6238 case llvm::Triple::x86:
6239 case llvm::Triple::x86_64:
6240 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6241 case llvm::Triple::ppc:
6242 case llvm::Triple::ppcle:
6243 case llvm::Triple::ppc64:
6244 case llvm::Triple::ppc64le:
6245 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6246 case llvm::Triple::r600:
6247 case llvm::Triple::amdgcn:
6248 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6249 case llvm::Triple::systemz:
6250 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6251 case llvm::Triple::nvptx:
6252 case llvm::Triple::nvptx64:
6253 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6254 case llvm::Triple::wasm32:
6255 case llvm::Triple::wasm64:
6256 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6257 case llvm::Triple::hexagon:
6258 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6259 case llvm::Triple::riscv32:
6260 case llvm::Triple::riscv64:
6261 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6262 default:
6263 return nullptr;
6264 }
6265}
6266
6268 const CallExpr *E,
6269 ReturnValueSlot ReturnValue) {
6270 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6271 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6273 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6274 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6275 }
6276
6277 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6278 getTarget().getTriple().getArch());
6279}
6280
6281static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6282 NeonTypeFlags TypeFlags,
6283 bool HasLegalHalfType = true,
6284 bool V1Ty = false,
6285 bool AllowBFloatArgsAndRet = true) {
6286 int IsQuad = TypeFlags.isQuad();
6287 switch (TypeFlags.getEltType()) {
6290 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6293 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6295 if (AllowBFloatArgsAndRet)
6296 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6297 else
6298 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6300 if (HasLegalHalfType)
6301 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6302 else
6303 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6305 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6308 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6310 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6311 // There is a lot of i128 and f128 API missing.
6312 // so we use v16i8 to represent poly128 and get pattern matched.
6313 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6315 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6317 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6318 }
6319 llvm_unreachable("Unknown vector element type!");
6320}
6321
6322static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6323 NeonTypeFlags IntTypeFlags) {
6324 int IsQuad = IntTypeFlags.isQuad();
6325 switch (IntTypeFlags.getEltType()) {
6327 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6329 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6331 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6332 default:
6333 llvm_unreachable("Type can't be converted to floating-point!");
6334 }
6335}
6336
6338 const ElementCount &Count) {
6339 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6340 return Builder.CreateShuffleVector(V, V, SV, "lane");
6341}
6342
6344 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6345 return EmitNeonSplat(V, C, EC);
6346}
6347
6349 const char *name,
6350 unsigned shift, bool rightshift) {
6351 unsigned j = 0;
6352 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6353 ai != ae; ++ai, ++j) {
6354 if (F->isConstrainedFPIntrinsic())
6355 if (ai->getType()->isMetadataTy())
6356 continue;
6357 if (shift > 0 && shift == j)
6358 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6359 else
6360 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6361 }
6362
6363 if (F->isConstrainedFPIntrinsic())
6364 return Builder.CreateConstrainedFPCall(F, Ops, name);
6365 else
6366 return Builder.CreateCall(F, Ops, name);
6367}
6368
6370 bool neg) {
6371 int SV = cast<ConstantInt>(V)->getSExtValue();
6372 return ConstantInt::get(Ty, neg ? -SV : SV);
6373}
6374
6375// Right-shift a vector by a constant.
6377 llvm::Type *Ty, bool usgn,
6378 const char *name) {
6379 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6380
6381 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6382 int EltSize = VTy->getScalarSizeInBits();
6383
6384 Vec = Builder.CreateBitCast(Vec, Ty);
6385
6386 // lshr/ashr are undefined when the shift amount is equal to the vector
6387 // element size.
6388 if (ShiftAmt == EltSize) {
6389 if (usgn) {
6390 // Right-shifting an unsigned value by its size yields 0.
6391 return llvm::ConstantAggregateZero::get(VTy);
6392 } else {
6393 // Right-shifting a signed value by its size is equivalent
6394 // to a shift of size-1.
6395 --ShiftAmt;
6396 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6397 }
6398 }
6399
6400 Shift = EmitNeonShiftVector(Shift, Ty, false);
6401 if (usgn)
6402 return Builder.CreateLShr(Vec, Shift, name);
6403 else
6404 return Builder.CreateAShr(Vec, Shift, name);
6405}
6406
6407enum {
6408 AddRetType = (1 << 0),
6409 Add1ArgType = (1 << 1),
6410 Add2ArgTypes = (1 << 2),
6411
6414
6416 UnsignedAlts = (1 << 6),
6417
6420
6428
6429namespace {
6430struct ARMVectorIntrinsicInfo {
6431 const char *NameHint;
6432 unsigned BuiltinID;
6433 unsigned LLVMIntrinsic;
6434 unsigned AltLLVMIntrinsic;
6436
6437 bool operator<(unsigned RHSBuiltinID) const {
6438 return BuiltinID < RHSBuiltinID;
6439 }
6440 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6441 return BuiltinID < TE.BuiltinID;
6442 }
6443};
6444} // end anonymous namespace
6445
6446#define NEONMAP0(NameBase) \
6447 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6448
6449#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6450 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6451 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6452
6453#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6454 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6455 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6456 TypeModifier }
6457
6458static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6459 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6460 NEONMAP0(splat_lane_v),
6461 NEONMAP0(splat_laneq_v),
6462 NEONMAP0(splatq_lane_v),
6463 NEONMAP0(splatq_laneq_v),
6464 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6465 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6466 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6467 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6468 NEONMAP0(vadd_v),
6469 NEONMAP0(vaddhn_v),
6470 NEONMAP0(vaddq_v),
6471 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6472 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6473 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6474 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6475 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6476 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6477 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6478 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6479 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6480 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6481 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6482 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6483 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6484 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6485 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6486 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6487 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6488 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6489 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6490 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6491 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6492 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6493 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6494 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6495 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6496 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6497 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6498 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6499 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6500 NEONMAP0(vceqz_v),
6501 NEONMAP0(vceqzq_v),
6502 NEONMAP0(vcgez_v),
6503 NEONMAP0(vcgezq_v),
6504 NEONMAP0(vcgtz_v),
6505 NEONMAP0(vcgtzq_v),
6506 NEONMAP0(vclez_v),
6507 NEONMAP0(vclezq_v),
6508 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6509 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6510 NEONMAP0(vcltz_v),
6511 NEONMAP0(vcltzq_v),
6512 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6513 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6514 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6515 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6516 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6517 NEONMAP0(vcvt_f16_s16),
6518 NEONMAP0(vcvt_f16_u16),
6519 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6520 NEONMAP0(vcvt_f32_v),
6521 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6522 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6523 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6524 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6525 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6526 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6527 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6528 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6529 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6530 NEONMAP0(vcvt_s16_f16),
6531 NEONMAP0(vcvt_s32_v),
6532 NEONMAP0(vcvt_s64_v),
6533 NEONMAP0(vcvt_u16_f16),
6534 NEONMAP0(vcvt_u32_v),
6535 NEONMAP0(vcvt_u64_v),
6536 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6537 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6538 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6539 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6540 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6541 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6542 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6543 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6544 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6545 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6546 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6547 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6548 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6549 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6550 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6551 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6552 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6553 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6554 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6555 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6556 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6557 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6558 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6559 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6560 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6561 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6562 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6563 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6564 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6565 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6566 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6567 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6568 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6569 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6570 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6571 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6572 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6573 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6574 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6575 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6576 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6577 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6578 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6579 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6580 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6581 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6582 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6583 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6584 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6585 NEONMAP0(vcvtq_f16_s16),
6586 NEONMAP0(vcvtq_f16_u16),
6587 NEONMAP0(vcvtq_f32_v),
6588 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6589 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6590 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6591 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6592 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6593 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6594 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6595 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6596 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6597 NEONMAP0(vcvtq_s16_f16),
6598 NEONMAP0(vcvtq_s32_v),
6599 NEONMAP0(vcvtq_s64_v),
6600 NEONMAP0(vcvtq_u16_f16),
6601 NEONMAP0(vcvtq_u32_v),
6602 NEONMAP0(vcvtq_u64_v),
6603 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6604 NEONMAP1(vdot_u32, arm_neon_udot, 0),
6605 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6606 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6607 NEONMAP0(vext_v),
6608 NEONMAP0(vextq_v),
6609 NEONMAP0(vfma_v),
6610 NEONMAP0(vfmaq_v),
6611 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6612 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6613 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6614 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6615 NEONMAP0(vld1_dup_v),
6616 NEONMAP1(vld1_v, arm_neon_vld1, 0),
6617 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
6618 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
6619 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
6620 NEONMAP0(vld1q_dup_v),
6621 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
6622 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
6623 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
6624 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
6625 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
6626 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
6627 NEONMAP1(vld2_v, arm_neon_vld2, 0),
6628 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
6629 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
6630 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
6631 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
6632 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
6633 NEONMAP1(vld3_v, arm_neon_vld3, 0),
6634 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
6635 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
6636 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
6637 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
6638 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
6639 NEONMAP1(vld4_v, arm_neon_vld4, 0),
6640 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
6641 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
6642 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
6643 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6644 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
6645 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
6646 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6647 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6648 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
6649 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
6650 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6651 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
6652 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
6653 NEONMAP0(vmovl_v),
6654 NEONMAP0(vmovn_v),
6655 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
6656 NEONMAP0(vmull_v),
6657 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
6658 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6659 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6660 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
6661 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6662 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6663 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
6664 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
6665 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
6666 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
6667 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6668 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6669 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6670 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6671 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6672 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6673 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6674 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6675 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6676 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6677 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6678 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6679 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6680 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6681 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6682 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6683 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6684 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6685 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6686 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6687 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6688 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6689 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6690 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6691 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6692 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6693 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6694 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6695 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6696 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6697 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6698 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6699 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6700 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6701 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6702 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6703 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6704 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6705 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6706 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6707 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6708 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6709 NEONMAP0(vrndi_v),
6710 NEONMAP0(vrndiq_v),
6711 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6712 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6713 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6714 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6715 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6716 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6717 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6718 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6719 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6720 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6721 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6722 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6723 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6724 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6725 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6726 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6727 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6728 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6729 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6730 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6731 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6732 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6733 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6734 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6735 NEONMAP0(vshl_n_v),
6736 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6737 NEONMAP0(vshll_n_v),
6738 NEONMAP0(vshlq_n_v),
6739 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6740 NEONMAP0(vshr_n_v),
6741 NEONMAP0(vshrn_n_v),
6742 NEONMAP0(vshrq_n_v),
6743 NEONMAP1(vst1_v, arm_neon_vst1, 0),
6744 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6745 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6746 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6747 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6748 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6749 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6750 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6751 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6752 NEONMAP1(vst2_v, arm_neon_vst2, 0),
6753 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6754 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6755 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6756 NEONMAP1(vst3_v, arm_neon_vst3, 0),
6757 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6758 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6759 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6760 NEONMAP1(vst4_v, arm_neon_vst4, 0),
6761 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6762 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6763 NEONMAP0(vsubhn_v),
6764 NEONMAP0(vtrn_v),
6765 NEONMAP0(vtrnq_v),
6766 NEONMAP0(vtst_v),
6767 NEONMAP0(vtstq_v),
6768 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6769 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6770 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6771 NEONMAP0(vuzp_v),
6772 NEONMAP0(vuzpq_v),
6773 NEONMAP0(vzip_v),
6774 NEONMAP0(vzipq_v)
6775};
6776
6777static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6778 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6779 NEONMAP0(splat_lane_v),
6780 NEONMAP0(splat_laneq_v),
6781 NEONMAP0(splatq_lane_v),
6782 NEONMAP0(splatq_laneq_v),
6783 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6784 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6785 NEONMAP0(vadd_v),
6786 NEONMAP0(vaddhn_v),
6787 NEONMAP0(vaddq_p128),
6788 NEONMAP0(vaddq_v),
6789 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6790 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6791 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6792 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6793 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6794 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6795 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6796 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6797 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6798 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6799 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6800 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6801 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6802 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6803 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6804 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6805 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6806 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6807 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6808 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6809 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6810 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6811 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6812 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6813 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6814 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6815 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6816 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6817 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6818 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6819 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6820 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6821 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6822 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6823 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6824 NEONMAP0(vceqz_v),
6825 NEONMAP0(vceqzq_v),
6826 NEONMAP0(vcgez_v),
6827 NEONMAP0(vcgezq_v),
6828 NEONMAP0(vcgtz_v),
6829 NEONMAP0(vcgtzq_v),
6830 NEONMAP0(vclez_v),
6831 NEONMAP0(vclezq_v),
6832 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6833 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6834 NEONMAP0(vcltz_v),
6835 NEONMAP0(vcltzq_v),
6836 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6837 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6838 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6839 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6840 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6841 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6842 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6843 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6844 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6845 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6846 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6847 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6848 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6849 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6850 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6851 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6852 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6853 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6854 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6855 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6856 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6857 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6858 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6859 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6860 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6861 NEONMAP0(vcvt_f16_s16),
6862 NEONMAP0(vcvt_f16_u16),
6863 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6864 NEONMAP0(vcvt_f32_v),
6865 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6866 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6867 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6868 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6869 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6870 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6871 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6872 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6873 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6874 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6875 NEONMAP0(vcvtq_f16_s16),
6876 NEONMAP0(vcvtq_f16_u16),
6877 NEONMAP0(vcvtq_f32_v),
6878 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6879 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6880 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6881 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6882 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6883 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6884 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6885 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6886 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6887 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6888 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6889 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6890 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6891 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6892 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6893 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6894 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6895 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6896 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6897 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6898 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6899 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6900 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6901 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6902 NEONMAP0(vext_v),
6903 NEONMAP0(vextq_v),
6904 NEONMAP0(vfma_v),
6905 NEONMAP0(vfmaq_v),
6906 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6907 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6908 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6909 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6910 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6911 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6912 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6913 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6914 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6915 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6916 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6917 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6918 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6919 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6920 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6921 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6922 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6923 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6924 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
6925 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
6926 NEONMAP0(vmovl_v),
6927 NEONMAP0(vmovn_v),
6928 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6929 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6930 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6931 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6932 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6933 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6934 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6935 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6936 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6937 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6938 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6939 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
6940 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
6941 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6942 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
6943 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
6944 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6945 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
6946 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
6947 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
6948 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
6949 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
6950 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
6951 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6952 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6953 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6954 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6955 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6956 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6957 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6958 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6959 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6960 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6961 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
6962 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6963 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6964 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
6965 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6966 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6967 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
6968 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6969 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
6970 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6971 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
6972 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
6973 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6974 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6975 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
6976 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
6977 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6978 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6979 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
6980 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
6981 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6982 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6983 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
6984 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
6985 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
6986 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
6987 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
6988 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
6989 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
6990 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
6991 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
6992 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
6993 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
6994 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
6995 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
6996 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
6997 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
6998 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
6999 NEONMAP0(vrndi_v),
7000 NEONMAP0(vrndiq_v),
7001 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7002 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7003 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7004 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7005 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7006 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7007 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7008 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7009 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7010 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7011 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7012 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7013 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7014 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7015 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7016 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7017 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7018 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7019 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7020 NEONMAP0(vshl_n_v),
7021 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7022 NEONMAP0(vshll_n_v),
7023 NEONMAP0(vshlq_n_v),
7024 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7025 NEONMAP0(vshr_n_v),
7026 NEONMAP0(vshrn_n_v),
7027 NEONMAP0(vshrq_n_v),
7028 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7029 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7030 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7031 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7032 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7033 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7034 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7035 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7036 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7037 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7038 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7039 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7040 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7041 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7042 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7043 NEONMAP0(vsubhn_v),
7044 NEONMAP0(vtst_v),
7045 NEONMAP0(vtstq_v),
7046 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7047 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7048 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7049 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7050};
7051
7052static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7053 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7054 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7055 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7056 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7057 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7058 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7059 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7060 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7061 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7062 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7063 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7064 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7065 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7066 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7067 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7068 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7069 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7070 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7071 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7072 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7073 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7074 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7075 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7076 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7077 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7078 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7079 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7080 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7081 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7082 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7083 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7084 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7085 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7086 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7087 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7088 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7089 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7090 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7091 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7092 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7093 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7094 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7095 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7096 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7097 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7098 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7099 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7100 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7101 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7102 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7103 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7104 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7105 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7106 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7107 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7108 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7109 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7110 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7111 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7112 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7113 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7114 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7115 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7116 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7117 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7118 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7119 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7120 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7121 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7122 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7123 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7124 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7125 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7126 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7127 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7128 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7129 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7130 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7131 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7132 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7133 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7134 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7135 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7136 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7137 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7138 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7139 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7140 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7141 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7142 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7143 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7144 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7145 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7146 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7147 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7148 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7149 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7150 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7151 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7152 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7153 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7154 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7155 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7156 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7157 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7158 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7159 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7160 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7161 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7162 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7163 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7164 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7165 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7166 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7167 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7168 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7169 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7170 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7171 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7172 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7173 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7174 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7175 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7176 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7177 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7178 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7179 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7180 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7181 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7182 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7183 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7184 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7185 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7186 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7187 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7188 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7189 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7190 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7191 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7192 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7193 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7194 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7195 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7196 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7197 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7198 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7199 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7200 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7201 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7202 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7203 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7204 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7205 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7206 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7207 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7208 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7209 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7210 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7211 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7212 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7213 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7214 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7215 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7216 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7217 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7218 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7219 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7220 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7221 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7222 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7223 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7224 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7225 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7226 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7227 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7228 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7229 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7230 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7231 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7232 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7233 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7234 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7235 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7236 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7237 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7238 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7239 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7240 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7241 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7242 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7243 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7244 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7245 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7246 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7247 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7248 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7249 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7250 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7251 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7252 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7253 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7254 // FP16 scalar intrinisics go here.
7255 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7256 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7257 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7258 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7259 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7260 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7261 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7262 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7263 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7264 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7265 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7266 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7267 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7268 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7269 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7270 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7271 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7272 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7273 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7274 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7275 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7276 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7277 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7278 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7279 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7280 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7281 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7282 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7283 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7284 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7285 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7286 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7287 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7288 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7289};
7290
7291// Some intrinsics are equivalent for codegen.
7292static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7293 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7294 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7295 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7296 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7297 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7298 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7299 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7300 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7301 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7302 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7303 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7304 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7305 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7306 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7307 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7308 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7309 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7310 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7311 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7312 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7313 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7314 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7315 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7316 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7317 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7318 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7319 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7320 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7321 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7322 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7323 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7324 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7325 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7326 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7327 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7328 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7329 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7330 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7331 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7332 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7333 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7334 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7335 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7336 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7337 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7338 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7339 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7340 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7341 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7342 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7343 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7344 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7345 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7346 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7347 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7348 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7349 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7350 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7351 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7352 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7353 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7354 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7355 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7356 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7357 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7358 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7359 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7360 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7361 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7362 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7363 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7364 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7365 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7366 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7367 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7368 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7369 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7370 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7371 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7372 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7373 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7374 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7375 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7376 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7377 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7378 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7379 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7380 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7381 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7382 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7383 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7384 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7385 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7386 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7387 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7388 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7389 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7390 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7391 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7392 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7393 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7394 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7395 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7396 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7397 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7398 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7399 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7400 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7401 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7402 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7403 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7404 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7405 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7406 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7407 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7408 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7409 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7410 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7411 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7412 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7413 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7414 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7415 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7416 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7417 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7418 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7419 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7420 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7421 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7422 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7423 // arbitrary one to be handled as tha canonical variation.
7424 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7425 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7426 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7427 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7428 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7429 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7430 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7431 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7432 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7433 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7434 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7435 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7436};
7437
7438#undef NEONMAP0
7439#undef NEONMAP1
7440#undef NEONMAP2
7441
7442#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7443 { \
7444 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7445 TypeModifier \
7446 }
7447
7448#define SVEMAP2(NameBase, TypeModifier) \
7449 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7450static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7451#define GET_SVE_LLVM_INTRINSIC_MAP
7452#include "clang/Basic/arm_sve_builtin_cg.inc"
7453#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7454#undef GET_SVE_LLVM_INTRINSIC_MAP
7455};
7456
7457#undef SVEMAP1
7458#undef SVEMAP2
7459
7460#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7461 { \
7462 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7463 TypeModifier \
7464 }
7465
7466#define SMEMAP2(NameBase, TypeModifier) \
7467 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7468static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7469#define GET_SME_LLVM_INTRINSIC_MAP
7470#include "clang/Basic/arm_sme_builtin_cg.inc"
7471#undef GET_SME_LLVM_INTRINSIC_MAP
7472};
7473
7474#undef SMEMAP1
7475#undef SMEMAP2
7476
7478
7483
7484static const ARMVectorIntrinsicInfo *
7486 unsigned BuiltinID, bool &MapProvenSorted) {
7487
7488#ifndef NDEBUG
7489 if (!MapProvenSorted) {
7490 assert(llvm::is_sorted(IntrinsicMap));
7491 MapProvenSorted = true;
7492 }
7493#endif
7494
7495 const ARMVectorIntrinsicInfo *Builtin =
7496 llvm::lower_bound(IntrinsicMap, BuiltinID);
7497
7498 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7499 return Builtin;
7500
7501 return nullptr;
7502}
7503
7505 unsigned Modifier,
7506 llvm::Type *ArgType,
7507 const CallExpr *E) {
7508 int VectorSize = 0;
7509 if (Modifier & Use64BitVectors)
7510 VectorSize = 64;
7511 else if (Modifier & Use128BitVectors)
7512 VectorSize = 128;
7513
7514 // Return type.
7516 if (Modifier & AddRetType) {
7517 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7518 if (Modifier & VectorizeRetType)
7519 Ty = llvm::FixedVectorType::get(
7520 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7521
7522 Tys.push_back(Ty);
7523 }
7524
7525 // Arguments.
7526 if (Modifier & VectorizeArgTypes) {
7527 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7528 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7529 }
7530
7531 if (Modifier & (Add1ArgType | Add2ArgTypes))
7532 Tys.push_back(ArgType);
7533
7534 if (Modifier & Add2ArgTypes)
7535 Tys.push_back(ArgType);
7536
7537 if (Modifier & InventFloatType)
7538 Tys.push_back(FloatTy);
7539
7540 return CGM.getIntrinsic(IntrinsicID, Tys);
7541}
7542
7544 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7545 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7546 unsigned BuiltinID = SISDInfo.BuiltinID;
7547 unsigned int Int = SISDInfo.LLVMIntrinsic;
7548 unsigned Modifier = SISDInfo.TypeModifier;
7549 const char *s = SISDInfo.NameHint;
7550
7551 switch (BuiltinID) {
7552 case NEON::BI__builtin_neon_vcled_s64:
7553 case NEON::BI__builtin_neon_vcled_u64:
7554 case NEON::BI__builtin_neon_vcles_f32:
7555 case NEON::BI__builtin_neon_vcled_f64:
7556 case NEON::BI__builtin_neon_vcltd_s64:
7557 case NEON::BI__builtin_neon_vcltd_u64:
7558 case NEON::BI__builtin_neon_vclts_f32:
7559 case NEON::BI__builtin_neon_vcltd_f64:
7560 case NEON::BI__builtin_neon_vcales_f32:
7561 case NEON::BI__builtin_neon_vcaled_f64:
7562 case NEON::BI__builtin_neon_vcalts_f32:
7563 case NEON::BI__builtin_neon_vcaltd_f64:
7564 // Only one direction of comparisons actually exist, cmle is actually a cmge
7565 // with swapped operands. The table gives us the right intrinsic but we
7566 // still need to do the swap.
7567 std::swap(Ops[0], Ops[1]);
7568 break;
7569 }
7570
7571 assert(Int && "Generic code assumes a valid intrinsic");
7572
7573 // Determine the type(s) of this overloaded AArch64 intrinsic.
7574 const Expr *Arg = E->getArg(0);
7575 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7576 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7577
7578 int j = 0;
7579 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7580 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7581 ai != ae; ++ai, ++j) {
7582 llvm::Type *ArgTy = ai->getType();
7583 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7584 ArgTy->getPrimitiveSizeInBits())
7585 continue;
7586
7587 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7588 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7589 // it before inserting.
7590 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7591 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7592 Ops[j] =
7593 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7594 }
7595
7596 Value *Result = CGF.EmitNeonCall(F, Ops, s);
7597 llvm::Type *ResultType = CGF.ConvertType(E->getType());
7598 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7599 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7600 return CGF.Builder.CreateExtractElement(Result, C0);
7601
7602 return CGF.Builder.CreateBitCast(Result, ResultType, s);
7603}
7604
7606 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7607 const char *NameHint, unsigned Modifier, const CallExpr *E,
7608 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
7609 llvm::Triple::ArchType Arch) {
7610 // Get the last argument, which specifies the vector type.
7611 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
7612 std::optional<llvm::APSInt> NeonTypeConst =
7614 if (!NeonTypeConst)
7615 return nullptr;
7616
7617 // Determine the type of this overloaded NEON intrinsic.
7618 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
7619 bool Usgn = Type.isUnsigned();
7620 bool Quad = Type.isQuad();
7621 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
7622 const bool AllowBFloatArgsAndRet =
7623 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
7624
7625 llvm::FixedVectorType *VTy =
7626 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
7627 llvm::Type *Ty = VTy;
7628 if (!Ty)
7629 return nullptr;
7630
7631 auto getAlignmentValue32 = [&](Address addr) -> Value* {
7632 return Builder.getInt32(addr.getAlignment().getQuantity());
7633 };
7634
7635 unsigned Int = LLVMIntrinsic;
7636 if ((Modifier & UnsignedAlts) && !Usgn)
7637 Int = AltLLVMIntrinsic;
7638
7639 switch (BuiltinID) {
7640 default: break;
7641 case NEON::BI__builtin_neon_splat_lane_v:
7642 case NEON::BI__builtin_neon_splat_laneq_v:
7643 case NEON::BI__builtin_neon_splatq_lane_v:
7644 case NEON::BI__builtin_neon_splatq_laneq_v: {
7645 auto NumElements = VTy->getElementCount();
7646 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
7647 NumElements = NumElements * 2;
7648 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
7649 NumElements = NumElements.divideCoefficientBy(2);
7650
7651 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7652 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
7653 }
7654 case NEON::BI__builtin_neon_vpadd_v:
7655 case NEON::BI__builtin_neon_vpaddq_v:
7656 // We don't allow fp/int overloading of intrinsics.
7657 if (VTy->getElementType()->isFloatingPointTy() &&
7658 Int == Intrinsic::aarch64_neon_addp)
7659 Int = Intrinsic::aarch64_neon_faddp;
7660 break;
7661 case NEON::BI__builtin_neon_vabs_v:
7662 case NEON::BI__builtin_neon_vabsq_v:
7663 if (VTy->getElementType()->isFloatingPointTy())
7664 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
7665 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
7666 case NEON::BI__builtin_neon_vadd_v:
7667 case NEON::BI__builtin_neon_vaddq_v: {
7668 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
7669 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7670 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7671 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
7672 return Builder.CreateBitCast(Ops[0], Ty);
7673 }
7674 case NEON::BI__builtin_neon_vaddhn_v: {
7675 llvm::FixedVectorType *SrcTy =
7676 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7677
7678 // %sum = add <4 x i32> %lhs, %rhs
7679 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7680 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7681 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
7682
7683 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7684 Constant *ShiftAmt =
7685 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7686 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
7687
7688 // %res = trunc <4 x i32> %high to <4 x i16>
7689 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
7690 }
7691 case NEON::BI__builtin_neon_vcale_v:
7692 case NEON::BI__builtin_neon_vcaleq_v:
7693 case NEON::BI__builtin_neon_vcalt_v:
7694 case NEON::BI__builtin_neon_vcaltq_v:
7695 std::swap(Ops[0], Ops[1]);
7696 [[fallthrough]];
7697 case NEON::BI__builtin_neon_vcage_v:
7698 case NEON::BI__builtin_neon_vcageq_v:
7699 case NEON::BI__builtin_neon_vcagt_v:
7700 case NEON::BI__builtin_neon_vcagtq_v: {
7701 llvm::Type *Ty;
7702 switch (VTy->getScalarSizeInBits()) {
7703 default: llvm_unreachable("unexpected type");
7704 case 32:
7705 Ty = FloatTy;
7706 break;
7707 case 64:
7708 Ty = DoubleTy;
7709 break;
7710 case 16:
7711 Ty = HalfTy;
7712 break;
7713 }
7714 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
7715 llvm::Type *Tys[] = { VTy, VecFlt };
7716 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7717 return EmitNeonCall(F, Ops, NameHint);
7718 }
7719 case NEON::BI__builtin_neon_vceqz_v:
7720 case NEON::BI__builtin_neon_vceqzq_v:
7721 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
7722 ICmpInst::ICMP_EQ, "vceqz");
7723 case NEON::BI__builtin_neon_vcgez_v:
7724 case NEON::BI__builtin_neon_vcgezq_v:
7725 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
7726 ICmpInst::ICMP_SGE, "vcgez");
7727 case NEON::BI__builtin_neon_vclez_v:
7728 case NEON::BI__builtin_neon_vclezq_v:
7729 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
7730 ICmpInst::ICMP_SLE, "vclez");
7731 case NEON::BI__builtin_neon_vcgtz_v:
7732 case NEON::BI__builtin_neon_vcgtzq_v:
7733 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
7734 ICmpInst::ICMP_SGT, "vcgtz");
7735 case NEON::BI__builtin_neon_vcltz_v:
7736 case NEON::BI__builtin_neon_vcltzq_v:
7737 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
7738 ICmpInst::ICMP_SLT, "vcltz");
7739 case NEON::BI__builtin_neon_vclz_v:
7740 case NEON::BI__builtin_neon_vclzq_v:
7741 // We generate target-independent intrinsic, which needs a second argument
7742 // for whether or not clz of zero is undefined; on ARM it isn't.
7743 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
7744 break;
7745 case NEON::BI__builtin_neon_vcvt_f32_v:
7746 case NEON::BI__builtin_neon_vcvtq_f32_v:
7747 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7748 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7749 HasLegalHalfType);
7750 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7751 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7752 case NEON::BI__builtin_neon_vcvt_f16_s16:
7753 case NEON::BI__builtin_neon_vcvt_f16_u16:
7754 case NEON::BI__builtin_neon_vcvtq_f16_s16:
7755 case NEON::BI__builtin_neon_vcvtq_f16_u16:
7756 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7757 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7758 HasLegalHalfType);
7759 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7760 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7761 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7762 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7763 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7764 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7765 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7766 Function *F = CGM.getIntrinsic(Int, Tys);
7767 return EmitNeonCall(F, Ops, "vcvt_n");
7768 }
7769 case NEON::BI__builtin_neon_vcvt_n_f32_v:
7770 case NEON::BI__builtin_neon_vcvt_n_f64_v:
7771 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7772 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7773 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7774 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7775 Function *F = CGM.getIntrinsic(Int, Tys);
7776 return EmitNeonCall(F, Ops, "vcvt_n");
7777 }
7778 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7779 case NEON::BI__builtin_neon_vcvt_n_s32_v:
7780 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7781 case NEON::BI__builtin_neon_vcvt_n_u32_v:
7782 case NEON::BI__builtin_neon_vcvt_n_s64_v:
7783 case NEON::BI__builtin_neon_vcvt_n_u64_v:
7784 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7785 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7786 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7787 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7788 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7789 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7790 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7791 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7792 return EmitNeonCall(F, Ops, "vcvt_n");
7793 }
7794 case NEON::BI__builtin_neon_vcvt_s32_v:
7795 case NEON::BI__builtin_neon_vcvt_u32_v:
7796 case NEON::BI__builtin_neon_vcvt_s64_v:
7797 case NEON::BI__builtin_neon_vcvt_u64_v:
7798 case NEON::BI__builtin_neon_vcvt_s16_f16:
7799 case NEON::BI__builtin_neon_vcvt_u16_f16:
7800 case NEON::BI__builtin_neon_vcvtq_s32_v:
7801 case NEON::BI__builtin_neon_vcvtq_u32_v:
7802 case NEON::BI__builtin_neon_vcvtq_s64_v:
7803 case NEON::BI__builtin_neon_vcvtq_u64_v:
7804 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7805 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7806 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7807 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
7808 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
7809 }
7810 case NEON::BI__builtin_neon_vcvta_s16_f16:
7811 case NEON::BI__builtin_neon_vcvta_s32_v:
7812 case NEON::BI__builtin_neon_vcvta_s64_v:
7813 case NEON::BI__builtin_neon_vcvta_u16_f16:
7814 case NEON::BI__builtin_neon_vcvta_u32_v:
7815 case NEON::BI__builtin_neon_vcvta_u64_v:
7816 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7817 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7818 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7819 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7820 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7821 case NEON::BI__builtin_neon_vcvtaq_u64_v:
7822 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7823 case NEON::BI__builtin_neon_vcvtn_s32_v:
7824 case NEON::BI__builtin_neon_vcvtn_s64_v:
7825 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7826 case NEON::BI__builtin_neon_vcvtn_u32_v:
7827 case NEON::BI__builtin_neon_vcvtn_u64_v:
7828 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7829 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7830 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7831 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7832 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7833 case NEON::BI__builtin_neon_vcvtnq_u64_v:
7834 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7835 case NEON::BI__builtin_neon_vcvtp_s32_v:
7836 case NEON::BI__builtin_neon_vcvtp_s64_v:
7837 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7838 case NEON::BI__builtin_neon_vcvtp_u32_v:
7839 case NEON::BI__builtin_neon_vcvtp_u64_v:
7840 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7841 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7842 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7843 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7844 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7845 case NEON::BI__builtin_neon_vcvtpq_u64_v:
7846 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7847 case NEON::BI__builtin_neon_vcvtm_s32_v:
7848 case NEON::BI__builtin_neon_vcvtm_s64_v:
7849 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7850 case NEON::BI__builtin_neon_vcvtm_u32_v:
7851 case NEON::BI__builtin_neon_vcvtm_u64_v:
7852 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7853 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7854 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7855 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7856 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7857 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7858 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7859 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7860 }
7861 case NEON::BI__builtin_neon_vcvtx_f32_v: {
7862 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7863 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7864
7865 }
7866 case NEON::BI__builtin_neon_vext_v:
7867 case NEON::BI__builtin_neon_vextq_v: {
7868 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
7869 SmallVector<int, 16> Indices;
7870 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7871 Indices.push_back(i+CV);
7872
7873 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7874 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7875 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
7876 }
7877 case NEON::BI__builtin_neon_vfma_v:
7878 case NEON::BI__builtin_neon_vfmaq_v: {
7879 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7880 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7881 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7882
7883 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7885 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7886 {Ops[1], Ops[2], Ops[0]});
7887 }
7888 case NEON::BI__builtin_neon_vld1_v:
7889 case NEON::BI__builtin_neon_vld1q_v: {
7890 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7891 Ops.push_back(getAlignmentValue32(PtrOp0));
7892 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
7893 }
7894 case NEON::BI__builtin_neon_vld1_x2_v:
7895 case NEON::BI__builtin_neon_vld1q_x2_v:
7896 case NEON::BI__builtin_neon_vld1_x3_v:
7897 case NEON::BI__builtin_neon_vld1q_x3_v:
7898 case NEON::BI__builtin_neon_vld1_x4_v:
7899 case NEON::BI__builtin_neon_vld1q_x4_v: {
7900 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7901 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7902 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7903 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7904 }
7905 case NEON::BI__builtin_neon_vld2_v:
7906 case NEON::BI__builtin_neon_vld2q_v:
7907 case NEON::BI__builtin_neon_vld3_v:
7908 case NEON::BI__builtin_neon_vld3q_v:
7909 case NEON::BI__builtin_neon_vld4_v:
7910 case NEON::BI__builtin_neon_vld4q_v:
7911 case NEON::BI__builtin_neon_vld2_dup_v:
7912 case NEON::BI__builtin_neon_vld2q_dup_v:
7913 case NEON::BI__builtin_neon_vld3_dup_v:
7914 case NEON::BI__builtin_neon_vld3q_dup_v:
7915 case NEON::BI__builtin_neon_vld4_dup_v:
7916 case NEON::BI__builtin_neon_vld4q_dup_v: {
7917 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7918 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7919 Value *Align = getAlignmentValue32(PtrOp1);
7920 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
7921 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7922 }
7923 case NEON::BI__builtin_neon_vld1_dup_v:
7924 case NEON::BI__builtin_neon_vld1q_dup_v: {
7925 Value *V = PoisonValue::get(Ty);
7926 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
7927 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
7928 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
7929 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
7930 return EmitNeonSplat(Ops[0], CI);
7931 }
7932 case NEON::BI__builtin_neon_vld2_lane_v:
7933 case NEON::BI__builtin_neon_vld2q_lane_v:
7934 case NEON::BI__builtin_neon_vld3_lane_v:
7935 case NEON::BI__builtin_neon_vld3q_lane_v:
7936 case NEON::BI__builtin_neon_vld4_lane_v:
7937 case NEON::BI__builtin_neon_vld4q_lane_v: {
7938 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7939 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7940 for (unsigned I = 2; I < Ops.size() - 1; ++I)
7941 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
7942 Ops.push_back(getAlignmentValue32(PtrOp1));
7943 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
7944 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7945 }
7946 case NEON::BI__builtin_neon_vmovl_v: {
7947 llvm::FixedVectorType *DTy =
7948 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7949 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
7950 if (Usgn)
7951 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
7952 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
7953 }
7954 case NEON::BI__builtin_neon_vmovn_v: {
7955 llvm::FixedVectorType *QTy =
7956 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7957 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
7958 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
7959 }
7960 case NEON::BI__builtin_neon_vmull_v:
7961 // FIXME: the integer vmull operations could be emitted in terms of pure
7962 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
7963 // hoisting the exts outside loops. Until global ISel comes along that can
7964 // see through such movement this leads to bad CodeGen. So we need an
7965 // intrinsic for now.
7966 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
7967 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
7968 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
7969 case NEON::BI__builtin_neon_vpadal_v:
7970 case NEON::BI__builtin_neon_vpadalq_v: {
7971 // The source operand type has twice as many elements of half the size.
7972 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7973 llvm::Type *EltTy =
7974 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7975 auto *NarrowTy =
7976 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7977 llvm::Type *Tys[2] = { Ty, NarrowTy };
7978 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7979 }
7980 case NEON::BI__builtin_neon_vpaddl_v:
7981 case NEON::BI__builtin_neon_vpaddlq_v: {
7982 // The source operand type has twice as many elements of half the size.
7983 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7984 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7985 auto *NarrowTy =
7986 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7987 llvm::Type *Tys[2] = { Ty, NarrowTy };
7988 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
7989 }
7990 case NEON::BI__builtin_neon_vqdmlal_v:
7991 case NEON::BI__builtin_neon_vqdmlsl_v: {
7992 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
7993 Ops[1] =
7994 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
7995 Ops.resize(2);
7996 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
7997 }
7998 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
7999 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8000 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8001 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8002 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8003 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8004 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8005 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8006 RTy->getNumElements() * 2);
8007 llvm::Type *Tys[2] = {
8008 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8009 /*isQuad*/ false))};
8010 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8011 }
8012 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8013 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8014 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8015 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8016 llvm::Type *Tys[2] = {
8017 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8018 /*isQuad*/ true))};
8019 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8020 }
8021 case NEON::BI__builtin_neon_vqshl_n_v:
8022 case NEON::BI__builtin_neon_vqshlq_n_v:
8023 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8024 1, false);
8025 case NEON::BI__builtin_neon_vqshlu_n_v:
8026 case NEON::BI__builtin_neon_vqshluq_n_v:
8027 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8028 1, false);
8029 case NEON::BI__builtin_neon_vrecpe_v:
8030 case NEON::BI__builtin_neon_vrecpeq_v:
8031 case NEON::BI__builtin_neon_vrsqrte_v:
8032 case NEON::BI__builtin_neon_vrsqrteq_v:
8033 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8034 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8035 case NEON::BI__builtin_neon_vrndi_v:
8036 case NEON::BI__builtin_neon_vrndiq_v:
8037 Int = Builder.getIsFPConstrained()
8038 ? Intrinsic::experimental_constrained_nearbyint
8039 : Intrinsic::nearbyint;
8040 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8041 case NEON::BI__builtin_neon_vrshr_n_v:
8042 case NEON::BI__builtin_neon_vrshrq_n_v:
8043 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8044 1, true);
8045 case NEON::BI__builtin_neon_vsha512hq_u64:
8046 case NEON::BI__builtin_neon_vsha512h2q_u64:
8047 case NEON::BI__builtin_neon_vsha512su0q_u64:
8048 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8049 Function *F = CGM.getIntrinsic(Int);
8050 return EmitNeonCall(F, Ops, "");
8051 }
8052 case NEON::BI__builtin_neon_vshl_n_v:
8053 case NEON::BI__builtin_neon_vshlq_n_v:
8054 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8055 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8056 "vshl_n");
8057 case NEON::BI__builtin_neon_vshll_n_v: {
8058 llvm::FixedVectorType *SrcTy =
8059 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8060 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8061 if (Usgn)
8062 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8063 else
8064 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8065 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8066 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8067 }
8068 case NEON::BI__builtin_neon_vshrn_n_v: {
8069 llvm::FixedVectorType *SrcTy =
8070 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8071 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8072 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8073 if (Usgn)
8074 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8075 else
8076 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8077 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8078 }
8079 case NEON::BI__builtin_neon_vshr_n_v:
8080 case NEON::BI__builtin_neon_vshrq_n_v:
8081 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8082 case NEON::BI__builtin_neon_vst1_v:
8083 case NEON::BI__builtin_neon_vst1q_v:
8084 case NEON::BI__builtin_neon_vst2_v:
8085 case NEON::BI__builtin_neon_vst2q_v:
8086 case NEON::BI__builtin_neon_vst3_v:
8087 case NEON::BI__builtin_neon_vst3q_v:
8088 case NEON::BI__builtin_neon_vst4_v:
8089 case NEON::BI__builtin_neon_vst4q_v:
8090 case NEON::BI__builtin_neon_vst2_lane_v:
8091 case NEON::BI__builtin_neon_vst2q_lane_v:
8092 case NEON::BI__builtin_neon_vst3_lane_v:
8093 case NEON::BI__builtin_neon_vst3q_lane_v:
8094 case NEON::BI__builtin_neon_vst4_lane_v:
8095 case NEON::BI__builtin_neon_vst4q_lane_v: {
8096 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8097 Ops.push_back(getAlignmentValue32(PtrOp0));
8098 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8099 }
8100 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8101 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8102 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8103 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8104 case NEON::BI__builtin_neon_vsm4eq_u32: {
8105 Function *F = CGM.getIntrinsic(Int);
8106 return EmitNeonCall(F, Ops, "");
8107 }
8108 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8109 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8110 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8111 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8112 Function *F = CGM.getIntrinsic(Int);
8113 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8114 return EmitNeonCall(F, Ops, "");
8115 }
8116 case NEON::BI__builtin_neon_vst1_x2_v:
8117 case NEON::BI__builtin_neon_vst1q_x2_v:
8118 case NEON::BI__builtin_neon_vst1_x3_v:
8119 case NEON::BI__builtin_neon_vst1q_x3_v:
8120 case NEON::BI__builtin_neon_vst1_x4_v:
8121 case NEON::BI__builtin_neon_vst1q_x4_v: {
8122 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8123 // in AArch64 it comes last. We may want to stick to one or another.
8124 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8125 Arch == llvm::Triple::aarch64_32) {
8126 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8127 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8128 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8129 }
8130 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8131 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8132 }
8133 case NEON::BI__builtin_neon_vsubhn_v: {
8134 llvm::FixedVectorType *SrcTy =
8135 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8136
8137 // %sum = add <4 x i32> %lhs, %rhs
8138 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8139 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8140 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8141
8142 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8143 Constant *ShiftAmt =
8144 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8145 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8146
8147 // %res = trunc <4 x i32> %high to <4 x i16>
8148 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8149 }
8150 case NEON::BI__builtin_neon_vtrn_v:
8151 case NEON::BI__builtin_neon_vtrnq_v: {
8152 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8153 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8154 Value *SV = nullptr;
8155
8156 for (unsigned vi = 0; vi != 2; ++vi) {
8157 SmallVector<int, 16> Indices;
8158 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8159 Indices.push_back(i+vi);
8160 Indices.push_back(i+e+vi);
8161 }
8162 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8163 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8164 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8165 }
8166 return SV;
8167 }
8168 case NEON::BI__builtin_neon_vtst_v:
8169 case NEON::BI__builtin_neon_vtstq_v: {
8170 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8171 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8172 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8173 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8174 ConstantAggregateZero::get(Ty));
8175 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8176 }
8177 case NEON::BI__builtin_neon_vuzp_v:
8178 case NEON::BI__builtin_neon_vuzpq_v: {
8179 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8180 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8181 Value *SV = nullptr;
8182
8183 for (unsigned vi = 0; vi != 2; ++vi) {
8184 SmallVector<int, 16> Indices;
8185 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8186 Indices.push_back(2*i+vi);
8187
8188 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8189 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8190 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8191 }
8192 return SV;
8193 }
8194 case NEON::BI__builtin_neon_vxarq_u64: {
8195 Function *F = CGM.getIntrinsic(Int);
8196 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8197 return EmitNeonCall(F, Ops, "");
8198 }
8199 case NEON::BI__builtin_neon_vzip_v:
8200 case NEON::BI__builtin_neon_vzipq_v: {
8201 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8202 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8203 Value *SV = nullptr;
8204
8205 for (unsigned vi = 0; vi != 2; ++vi) {
8206 SmallVector<int, 16> Indices;
8207 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8208 Indices.push_back((i + vi*e) >> 1);
8209 Indices.push_back(((i + vi*e) >> 1)+e);
8210 }
8211 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8212 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8213 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8214 }
8215 return SV;
8216 }
8217 case NEON::BI__builtin_neon_vdot_s32:
8218 case NEON::BI__builtin_neon_vdot_u32:
8219 case NEON::BI__builtin_neon_vdotq_s32:
8220 case NEON::BI__builtin_neon_vdotq_u32: {
8221 auto *InputTy =
8222 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8223 llvm::Type *Tys[2] = { Ty, InputTy };
8224 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8225 }
8226 case NEON::BI__builtin_neon_vfmlal_low_f16:
8227 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8228 auto *InputTy =
8229 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8230 llvm::Type *Tys[2] = { Ty, InputTy };
8231 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8232 }
8233 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8234 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8235 auto *InputTy =
8236 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8237 llvm::Type *Tys[2] = { Ty, InputTy };
8238 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8239 }
8240 case NEON::BI__builtin_neon_vfmlal_high_f16:
8241 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8242 auto *InputTy =
8243 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8244 llvm::Type *Tys[2] = { Ty, InputTy };
8245 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8246 }
8247 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8248 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8249 auto *InputTy =
8250 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8251 llvm::Type *Tys[2] = { Ty, InputTy };
8252 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8253 }
8254 case NEON::BI__builtin_neon_vmmlaq_s32:
8255 case NEON::BI__builtin_neon_vmmlaq_u32: {
8256 auto *InputTy =
8257 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8258 llvm::Type *Tys[2] = { Ty, InputTy };
8259 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8260 }
8261 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8262 auto *InputTy =
8263 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8264 llvm::Type *Tys[2] = { Ty, InputTy };
8265 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8266 }
8267 case NEON::BI__builtin_neon_vusdot_s32:
8268 case NEON::BI__builtin_neon_vusdotq_s32: {
8269 auto *InputTy =
8270 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8271 llvm::Type *Tys[2] = { Ty, InputTy };
8272 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8273 }
8274 case NEON::BI__builtin_neon_vbfdot_f32:
8275 case NEON::BI__builtin_neon_vbfdotq_f32: {
8276 llvm::Type *InputTy =
8277 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8278 llvm::Type *Tys[2] = { Ty, InputTy };
8279 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8280 }
8281 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8282 llvm::Type *Tys[1] = { Ty };
8283 Function *F = CGM.getIntrinsic(Int, Tys);
8284 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8285 }
8286
8287 }
8288
8289 assert(Int && "Expected valid intrinsic number");
8290
8291 // Determine the type(s) of this overloaded AArch64 intrinsic.
8292 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8293
8294 Value *Result = EmitNeonCall(F, Ops, NameHint);
8295 llvm::Type *ResultType = ConvertType(E->getType());
8296 // AArch64 intrinsic one-element vector type cast to
8297 // scalar type expected by the builtin
8298 return Builder.CreateBitCast(Result, ResultType, NameHint);
8299}
8300
8302 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8303 const CmpInst::Predicate Ip, const Twine &Name) {
8304 llvm::Type *OTy = Op->getType();
8305
8306 // FIXME: this is utterly horrific. We should not be looking at previous
8307 // codegen context to find out what needs doing. Unfortunately TableGen
8308 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8309 // (etc).
8310 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8311 OTy = BI->getOperand(0)->getType();
8312
8313 Op = Builder.CreateBitCast(Op, OTy);
8314 if (OTy->getScalarType()->isFloatingPointTy()) {
8315 if (Fp == CmpInst::FCMP_OEQ)
8316 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8317 else
8318 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8319 } else {
8320 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8321 }
8322 return Builder.CreateSExt(Op, Ty, Name);
8323}
8324
8326 Value *ExtOp, Value *IndexOp,
8327 llvm::Type *ResTy, unsigned IntID,
8328 const char *Name) {
8330 if (ExtOp)
8331 TblOps.push_back(ExtOp);
8332
8333 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8334 SmallVector<int, 16> Indices;
8335 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8336 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8337 Indices.push_back(2*i);
8338 Indices.push_back(2*i+1);
8339 }
8340
8341 int PairPos = 0, End = Ops.size() - 1;
8342 while (PairPos < End) {
8343 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8344 Ops[PairPos+1], Indices,
8345 Name));
8346 PairPos += 2;
8347 }
8348
8349 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8350 // of the 128-bit lookup table with zero.
8351 if (PairPos == End) {
8352 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8353 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8354 ZeroTbl, Indices, Name));
8355 }
8356
8357 Function *TblF;
8358 TblOps.push_back(IndexOp);
8359 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8360
8361 return CGF.EmitNeonCall(TblF, TblOps, Name);
8362}
8363
8364Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8365 unsigned Value;
8366 switch (BuiltinID) {
8367 default:
8368 return nullptr;
8369 case clang::ARM::BI__builtin_arm_nop:
8370 Value = 0;
8371 break;
8372 case clang::ARM::BI__builtin_arm_yield:
8373 case clang::ARM::BI__yield:
8374 Value = 1;
8375 break;
8376 case clang::ARM::BI__builtin_arm_wfe:
8377 case clang::ARM::BI__wfe:
8378 Value = 2;
8379 break;
8380 case clang::ARM::BI__builtin_arm_wfi:
8381 case clang::ARM::BI__wfi:
8382 Value = 3;
8383 break;
8384 case clang::ARM::BI__builtin_arm_sev:
8385 case clang::ARM::BI__sev:
8386 Value = 4;
8387 break;
8388 case clang::ARM::BI__builtin_arm_sevl:
8389 case clang::ARM::BI__sevl:
8390 Value = 5;
8391 break;
8392 }
8393
8394 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8395 llvm::ConstantInt::get(Int32Ty, Value));
8396}
8397
8402};
8403
8404// Generates the IR for __builtin_read_exec_*.
8405// Lowers the builtin to amdgcn_ballot intrinsic.
8407 llvm::Type *RegisterType,
8408 llvm::Type *ValueType, bool isExecHi) {
8409 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8410 CodeGen::CodeGenModule &CGM = CGF.CGM;
8411
8412 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8413 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8414
8415 if (isExecHi) {
8416 Value *Rt2 = Builder.CreateLShr(Call, 32);
8417 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8418 return Rt2;
8419 }
8420
8421 return Call;
8422}
8423
8424// Generates the IR for the read/write special register builtin,
8425// ValueType is the type of the value that is to be written or read,
8426// RegisterType is the type of the register being written to or read from.
8428 const CallExpr *E,
8429 llvm::Type *RegisterType,
8430 llvm::Type *ValueType,
8431 SpecialRegisterAccessKind AccessKind,
8432 StringRef SysReg = "") {
8433 // write and register intrinsics only support 32, 64 and 128 bit operations.
8434 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8435 RegisterType->isIntegerTy(128)) &&
8436 "Unsupported size for register.");
8437
8438 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8439 CodeGen::CodeGenModule &CGM = CGF.CGM;
8440 LLVMContext &Context = CGM.getLLVMContext();
8441
8442 if (SysReg.empty()) {
8443 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8444 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8445 }
8446
8447 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8448 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8449 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8450
8451 llvm::Type *Types[] = { RegisterType };
8452
8453 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8454 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8455 && "Can't fit 64-bit value in 32-bit register");
8456
8457 if (AccessKind != Write) {
8458 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8459 llvm::Function *F = CGM.getIntrinsic(
8460 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8461 : llvm::Intrinsic::read_register,
8462 Types);
8463 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8464
8465 if (MixedTypes)
8466 // Read into 64 bit register and then truncate result to 32 bit.
8467 return Builder.CreateTrunc(Call, ValueType);
8468
8469 if (ValueType->isPointerTy())
8470 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8471 return Builder.CreateIntToPtr(Call, ValueType);
8472
8473 return Call;
8474 }
8475
8476 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8477 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8478 if (MixedTypes) {
8479 // Extend 32 bit write value to 64 bit to pass to write.
8480 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8481 return Builder.CreateCall(F, { Metadata, ArgValue });
8482 }
8483
8484 if (ValueType->isPointerTy()) {
8485 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8486 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8487 return Builder.CreateCall(F, { Metadata, ArgValue });
8488 }
8489
8490 return Builder.CreateCall(F, { Metadata, ArgValue });
8491}
8492
8493/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8494/// argument that specifies the vector type.
8495static bool HasExtraNeonArgument(unsigned BuiltinID) {
8496 switch (BuiltinID) {
8497 default: break;
8498 case NEON::BI__builtin_neon_vget_lane_i8:
8499 case NEON::BI__builtin_neon_vget_lane_i16:
8500 case NEON::BI__builtin_neon_vget_lane_bf16:
8501 case NEON::BI__builtin_neon_vget_lane_i32:
8502 case NEON::BI__builtin_neon_vget_lane_i64:
8503 case NEON::BI__builtin_neon_vget_lane_f32:
8504 case NEON::BI__builtin_neon_vgetq_lane_i8:
8505 case NEON::BI__builtin_neon_vgetq_lane_i16:
8506 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8507 case NEON::BI__builtin_neon_vgetq_lane_i32:
8508 case NEON::BI__builtin_neon_vgetq_lane_i64:
8509 case NEON::BI__builtin_neon_vgetq_lane_f32:
8510 case NEON::BI__builtin_neon_vduph_lane_bf16:
8511 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8512 case NEON::BI__builtin_neon_vset_lane_i8:
8513 case NEON::BI__builtin_neon_vset_lane_i16:
8514 case NEON::BI__builtin_neon_vset_lane_bf16:
8515 case NEON::BI__builtin_neon_vset_lane_i32:
8516 case NEON::BI__builtin_neon_vset_lane_i64:
8517 case NEON::BI__builtin_neon_vset_lane_f32:
8518 case NEON::BI__builtin_neon_vsetq_lane_i8:
8519 case NEON::BI__builtin_neon_vsetq_lane_i16:
8520 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8521 case NEON::BI__builtin_neon_vsetq_lane_i32:
8522 case NEON::BI__builtin_neon_vsetq_lane_i64:
8523 case NEON::BI__builtin_neon_vsetq_lane_f32:
8524 case NEON::BI__builtin_neon_vsha1h_u32:
8525 case NEON::BI__builtin_neon_vsha1cq_u32:
8526 case NEON::BI__builtin_neon_vsha1pq_u32:
8527 case NEON::BI__builtin_neon_vsha1mq_u32:
8528 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8529 case clang::ARM::BI_MoveToCoprocessor:
8530 case clang::ARM::BI_MoveToCoprocessor2:
8531 return false;
8532 }
8533 return true;
8534}
8535
8536Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8537 const CallExpr *E,
8538 ReturnValueSlot ReturnValue,
8539 llvm::Triple::ArchType Arch) {
8540 if (auto Hint = GetValueForARMHint(BuiltinID))
8541 return Hint;
8542
8543 if (BuiltinID == clang::ARM::BI__emit) {
8544 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8545 llvm::FunctionType *FTy =
8546 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8547
8549 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8550 llvm_unreachable("Sema will ensure that the parameter is constant");
8551
8552 llvm::APSInt Value = Result.Val.getInt();
8553 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8554
8555 llvm::InlineAsm *Emit =
8556 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8557 /*hasSideEffects=*/true)
8558 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8559 /*hasSideEffects=*/true);
8560
8561 return Builder.CreateCall(Emit);
8562 }
8563
8564 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8565 Value *Option = EmitScalarExpr(E->getArg(0));
8566 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8567 }
8568
8569 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8571 Value *RW = EmitScalarExpr(E->getArg(1));
8572 Value *IsData = EmitScalarExpr(E->getArg(2));
8573
8574 // Locality is not supported on ARM target
8575 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8576
8577 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8578 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8579 }
8580
8581 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8582 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8583 return Builder.CreateCall(
8584 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8585 }
8586
8587 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8588 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8589 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8590 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8591 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8592 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8593 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8594 return Res;
8595 }
8596
8597
8598 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8599 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8600 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8601 }
8602 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8603 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8604 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8605 "cls");
8606 }
8607
8608 if (BuiltinID == clang::ARM::BI__clear_cache) {
8609 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
8610 const FunctionDecl *FD = E->getDirectCallee();
8611 Value *Ops[2];
8612 for (unsigned i = 0; i < 2; i++)
8613 Ops[i] = EmitScalarExpr(E->getArg(i));
8614 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
8615 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
8616 StringRef Name = FD->getName();
8617 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
8618 }
8619
8620 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
8621 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
8622 Function *F;
8623
8624 switch (BuiltinID) {
8625 default: llvm_unreachable("unexpected builtin");
8626 case clang::ARM::BI__builtin_arm_mcrr:
8627 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
8628 break;
8629 case clang::ARM::BI__builtin_arm_mcrr2:
8630 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
8631 break;
8632 }
8633
8634 // MCRR{2} instruction has 5 operands but
8635 // the intrinsic has 4 because Rt and Rt2
8636 // are represented as a single unsigned 64
8637 // bit integer in the intrinsic definition
8638 // but internally it's represented as 2 32
8639 // bit integers.
8640
8641 Value *Coproc = EmitScalarExpr(E->getArg(0));
8642 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8643 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
8644 Value *CRm = EmitScalarExpr(E->getArg(3));
8645
8646 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8647 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
8648 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
8649 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
8650
8651 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
8652 }
8653
8654 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
8655 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
8656 Function *F;
8657
8658 switch (BuiltinID) {
8659 default: llvm_unreachable("unexpected builtin");
8660 case clang::ARM::BI__builtin_arm_mrrc:
8661 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
8662 break;
8663 case clang::ARM::BI__builtin_arm_mrrc2:
8664 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
8665 break;
8666 }
8667
8668 Value *Coproc = EmitScalarExpr(E->getArg(0));
8669 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8670 Value *CRm = EmitScalarExpr(E->getArg(2));
8671 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
8672
8673 // Returns an unsigned 64 bit integer, represented
8674 // as two 32 bit integers.
8675
8676 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
8677 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
8678 Rt = Builder.CreateZExt(Rt, Int64Ty);
8679 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
8680
8681 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
8682 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
8683 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
8684
8685 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
8686 }
8687
8688 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8689 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8690 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
8691 getContext().getTypeSize(E->getType()) == 64) ||
8692 BuiltinID == clang::ARM::BI__ldrexd) {
8693 Function *F;
8694
8695 switch (BuiltinID) {
8696 default: llvm_unreachable("unexpected builtin");
8697 case clang::ARM::BI__builtin_arm_ldaex:
8698 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8699 break;
8700 case clang::ARM::BI__builtin_arm_ldrexd:
8701 case clang::ARM::BI__builtin_arm_ldrex:
8702 case clang::ARM::BI__ldrexd:
8703 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8704 break;
8705 }
8706
8707 Value *LdPtr = EmitScalarExpr(E->getArg(0));
8708 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
8709
8710 Value *Val0 = Builder.CreateExtractValue(Val, 1);
8711 Value *Val1 = Builder.CreateExtractValue(Val, 0);
8712 Val0 = Builder.CreateZExt(Val0, Int64Ty);
8713 Val1 = Builder.CreateZExt(Val1, Int64Ty);
8714
8715 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
8716 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
8717 Val = Builder.CreateOr(Val, Val1);
8718 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
8719 }
8720
8721 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8722 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
8723 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
8724
8725 QualType Ty = E->getType();
8726 llvm::Type *RealResTy = ConvertType(Ty);
8727 llvm::Type *IntTy =
8728 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8729
8731 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
8732 : Intrinsic::arm_ldrex,
8733 UnqualPtrTy);
8734 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
8735 Val->addParamAttr(
8736 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8737
8738 if (RealResTy->isPointerTy())
8739 return Builder.CreateIntToPtr(Val, RealResTy);
8740 else {
8741 llvm::Type *IntResTy = llvm::IntegerType::get(
8742 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
8743 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
8744 RealResTy);
8745 }
8746 }
8747
8748 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8749 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8750 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
8751 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
8753 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
8754 : Intrinsic::arm_strexd);
8755 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
8756
8757 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8758 Value *Val = EmitScalarExpr(E->getArg(0));
8759 Builder.CreateStore(Val, Tmp);
8760
8761 Address LdPtr = Tmp.withElementType(STy);
8762 Val = Builder.CreateLoad(LdPtr);
8763
8764 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
8765 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
8766 Value *StPtr = EmitScalarExpr(E->getArg(1));
8767 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
8768 }
8769
8770 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8771 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
8772 Value *StoreVal = EmitScalarExpr(E->getArg(0));
8773 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
8774
8775 QualType Ty = E->getArg(0)->getType();
8776 llvm::Type *StoreTy =
8777 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8778
8779 if (StoreVal->getType()->isPointerTy())
8780 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
8781 else {
8782 llvm::Type *IntTy = llvm::IntegerType::get(
8784 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
8785 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
8786 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
8787 }
8788
8790 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
8791 : Intrinsic::arm_strex,
8792 StoreAddr->getType());
8793
8794 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
8795 CI->addParamAttr(
8796 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8797 return CI;
8798 }
8799
8800 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8801 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8802 return Builder.CreateCall(F);
8803 }
8804
8805 // CRC32
8806 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8807 switch (BuiltinID) {
8808 case clang::ARM::BI__builtin_arm_crc32b:
8809 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8810 case clang::ARM::BI__builtin_arm_crc32cb:
8811 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8812 case clang::ARM::BI__builtin_arm_crc32h:
8813 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8814 case clang::ARM::BI__builtin_arm_crc32ch:
8815 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8816 case clang::ARM::BI__builtin_arm_crc32w:
8817 case clang::ARM::BI__builtin_arm_crc32d:
8818 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8819 case clang::ARM::BI__builtin_arm_crc32cw:
8820 case clang::ARM::BI__builtin_arm_crc32cd:
8821 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8822 }
8823
8824 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8825 Value *Arg0 = EmitScalarExpr(E->getArg(0));
8826 Value *Arg1 = EmitScalarExpr(E->getArg(1));
8827
8828 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8829 // intrinsics, hence we need different codegen for these cases.
8830 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8831 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
8832 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8833 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
8834 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
8835 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
8836
8837 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8838 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
8839 return Builder.CreateCall(F, {Res, Arg1b});
8840 } else {
8841 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
8842
8843 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8844 return Builder.CreateCall(F, {Arg0, Arg1});
8845 }
8846 }
8847
8848 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8849 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8850 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8851 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
8852 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
8853 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
8854
8855 SpecialRegisterAccessKind AccessKind = Write;
8856 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8857 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8858 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
8859 AccessKind = VolatileRead;
8860
8861 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8862 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
8863
8864 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8865 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
8866
8867 llvm::Type *ValueType;
8868 llvm::Type *RegisterType;
8869 if (IsPointerBuiltin) {
8870 ValueType = VoidPtrTy;
8871 RegisterType = Int32Ty;
8872 } else if (Is64Bit) {
8873 ValueType = RegisterType = Int64Ty;
8874 } else {
8875 ValueType = RegisterType = Int32Ty;
8876 }
8877
8878 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
8879 AccessKind);
8880 }
8881
8882 if (BuiltinID == ARM::BI__builtin_sponentry) {
8883 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8884 return Builder.CreateCall(F);
8885 }
8886
8887 // Handle MSVC intrinsics before argument evaluation to prevent double
8888 // evaluation.
8889 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8890 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
8891
8892 // Deal with MVE builtins
8893 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8894 return Result;
8895 // Handle CDE builtins
8896 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8897 return Result;
8898
8899 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8900 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
8901 return P.first == BuiltinID;
8902 });
8903 if (It != end(NEONEquivalentIntrinsicMap))
8904 BuiltinID = It->second;
8905
8906 // Find out if any arguments are required to be integer constant
8907 // expressions.
8908 unsigned ICEArguments = 0;
8910 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8911 assert(Error == ASTContext::GE_None && "Should not codegen an error");
8912
8913 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8914 return Builder.getInt32(addr.getAlignment().getQuantity());
8915 };
8916
8917 Address PtrOp0 = Address::invalid();
8918 Address PtrOp1 = Address::invalid();
8920 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
8921 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
8922 for (unsigned i = 0, e = NumArgs; i != e; i++) {
8923 if (i == 0) {
8924 switch (BuiltinID) {
8925 case NEON::BI__builtin_neon_vld1_v:
8926 case NEON::BI__builtin_neon_vld1q_v:
8927 case NEON::BI__builtin_neon_vld1q_lane_v:
8928 case NEON::BI__builtin_neon_vld1_lane_v:
8929 case NEON::BI__builtin_neon_vld1_dup_v:
8930 case NEON::BI__builtin_neon_vld1q_dup_v:
8931 case NEON::BI__builtin_neon_vst1_v:
8932 case NEON::BI__builtin_neon_vst1q_v:
8933 case NEON::BI__builtin_neon_vst1q_lane_v:
8934 case NEON::BI__builtin_neon_vst1_lane_v:
8935 case NEON::BI__builtin_neon_vst2_v:
8936 case NEON::BI__builtin_neon_vst2q_v:
8937 case NEON::BI__builtin_neon_vst2_lane_v:
8938 case NEON::BI__builtin_neon_vst2q_lane_v:
8939 case NEON::BI__builtin_neon_vst3_v:
8940 case NEON::BI__builtin_neon_vst3q_v:
8941 case NEON::BI__builtin_neon_vst3_lane_v:
8942 case NEON::BI__builtin_neon_vst3q_lane_v:
8943 case NEON::BI__builtin_neon_vst4_v:
8944 case NEON::BI__builtin_neon_vst4q_v:
8945 case NEON::BI__builtin_neon_vst4_lane_v:
8946 case NEON::BI__builtin_neon_vst4q_lane_v:
8947 // Get the alignment for the argument in addition to the value;
8948 // we'll use it later.
8949 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
8950 Ops.push_back(PtrOp0.emitRawPointer(*this));
8951 continue;
8952 }
8953 }
8954 if (i == 1) {
8955 switch (BuiltinID) {
8956 case NEON::BI__builtin_neon_vld2_v:
8957 case NEON::BI__builtin_neon_vld2q_v:
8958 case NEON::BI__builtin_neon_vld3_v:
8959 case NEON::BI__builtin_neon_vld3q_v:
8960 case NEON::BI__builtin_neon_vld4_v:
8961 case NEON::BI__builtin_neon_vld4q_v:
8962 case NEON::BI__builtin_neon_vld2_lane_v:
8963 case NEON::BI__builtin_neon_vld2q_lane_v:
8964 case NEON::BI__builtin_neon_vld3_lane_v:
8965 case NEON::BI__builtin_neon_vld3q_lane_v:
8966 case NEON::BI__builtin_neon_vld4_lane_v:
8967 case NEON::BI__builtin_neon_vld4q_lane_v:
8968 case NEON::BI__builtin_neon_vld2_dup_v:
8969 case NEON::BI__builtin_neon_vld2q_dup_v:
8970 case NEON::BI__builtin_neon_vld3_dup_v:
8971 case NEON::BI__builtin_neon_vld3q_dup_v:
8972 case NEON::BI__builtin_neon_vld4_dup_v:
8973 case NEON::BI__builtin_neon_vld4q_dup_v:
8974 // Get the alignment for the argument in addition to the value;
8975 // we'll use it later.
8976 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
8977 Ops.push_back(PtrOp1.emitRawPointer(*this));
8978 continue;
8979 }
8980 }
8981
8982 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
8983 }
8984
8985 switch (BuiltinID) {
8986 default: break;
8987
8988 case NEON::BI__builtin_neon_vget_lane_i8:
8989 case NEON::BI__builtin_neon_vget_lane_i16:
8990 case NEON::BI__builtin_neon_vget_lane_i32:
8991 case NEON::BI__builtin_neon_vget_lane_i64:
8992 case NEON::BI__builtin_neon_vget_lane_bf16:
8993 case NEON::BI__builtin_neon_vget_lane_f32:
8994 case NEON::BI__builtin_neon_vgetq_lane_i8:
8995 case NEON::BI__builtin_neon_vgetq_lane_i16:
8996 case NEON::BI__builtin_neon_vgetq_lane_i32:
8997 case NEON::BI__builtin_neon_vgetq_lane_i64:
8998 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8999 case NEON::BI__builtin_neon_vgetq_lane_f32:
9000 case NEON::BI__builtin_neon_vduph_lane_bf16:
9001 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9002 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9003
9004 case NEON::BI__builtin_neon_vrndns_f32: {
9005 Value *Arg = EmitScalarExpr(E->getArg(0));
9006 llvm::Type *Tys[] = {Arg->getType()};
9007 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9008 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9009
9010 case NEON::BI__builtin_neon_vset_lane_i8:
9011 case NEON::BI__builtin_neon_vset_lane_i16:
9012 case NEON::BI__builtin_neon_vset_lane_i32:
9013 case NEON::BI__builtin_neon_vset_lane_i64:
9014 case NEON::BI__builtin_neon_vset_lane_bf16:
9015 case NEON::BI__builtin_neon_vset_lane_f32:
9016 case NEON::BI__builtin_neon_vsetq_lane_i8:
9017 case NEON::BI__builtin_neon_vsetq_lane_i16:
9018 case NEON::BI__builtin_neon_vsetq_lane_i32:
9019 case NEON::BI__builtin_neon_vsetq_lane_i64:
9020 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9021 case NEON::BI__builtin_neon_vsetq_lane_f32:
9022 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9023
9024 case NEON::BI__builtin_neon_vsha1h_u32:
9025 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9026 "vsha1h");
9027 case NEON::BI__builtin_neon_vsha1cq_u32:
9028 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9029 "vsha1h");
9030 case NEON::BI__builtin_neon_vsha1pq_u32:
9031 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9032 "vsha1h");
9033 case NEON::BI__builtin_neon_vsha1mq_u32:
9034 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9035 "vsha1h");
9036
9037 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9038 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9039 "vcvtbfp2bf");
9040 }
9041
9042 // The ARM _MoveToCoprocessor builtins put the input register value as
9043 // the first argument, but the LLVM intrinsic expects it as the third one.
9044 case clang::ARM::BI_MoveToCoprocessor:
9045 case clang::ARM::BI_MoveToCoprocessor2: {
9046 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9047 ? Intrinsic::arm_mcr
9048 : Intrinsic::arm_mcr2);
9049 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9050 Ops[3], Ops[4], Ops[5]});
9051 }
9052 }
9053
9054 // Get the last argument, which specifies the vector type.
9055 assert(HasExtraArg);
9056 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9057 std::optional<llvm::APSInt> Result =
9059 if (!Result)
9060 return nullptr;
9061
9062 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9063 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9064 // Determine the overloaded type of this builtin.
9065 llvm::Type *Ty;
9066 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9067 Ty = FloatTy;
9068 else
9069 Ty = DoubleTy;
9070
9071 // Determine whether this is an unsigned conversion or not.
9072 bool usgn = Result->getZExtValue() == 1;
9073 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9074
9075 // Call the appropriate intrinsic.
9076 Function *F = CGM.getIntrinsic(Int, Ty);
9077 return Builder.CreateCall(F, Ops, "vcvtr");
9078 }
9079
9080 // Determine the type of this overloaded NEON intrinsic.
9081 NeonTypeFlags Type = Result->getZExtValue();
9082 bool usgn = Type.isUnsigned();
9083 bool rightShift = false;
9084
9085 llvm::FixedVectorType *VTy =
9086 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9087 getTarget().hasBFloat16Type());
9088 llvm::Type *Ty = VTy;
9089 if (!Ty)
9090 return nullptr;
9091
9092 // Many NEON builtins have identical semantics and uses in ARM and
9093 // AArch64. Emit these in a single function.
9094 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9095 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9096 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9097 if (Builtin)
9099 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9100 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9101
9102 unsigned Int;
9103 switch (BuiltinID) {
9104 default: return nullptr;
9105 case NEON::BI__builtin_neon_vld1q_lane_v:
9106 // Handle 64-bit integer elements as a special case. Use shuffles of
9107 // one-element vectors to avoid poor code for i64 in the backend.
9108 if (VTy->getElementType()->isIntegerTy(64)) {
9109 // Extract the other lane.
9110 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9111 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9112 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9113 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9114 // Load the value as a one-element vector.
9115 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9116 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9117 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9118 Value *Align = getAlignmentValue32(PtrOp0);
9119 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9120 // Combine them.
9121 int Indices[] = {1 - Lane, Lane};
9122 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9123 }
9124 [[fallthrough]];
9125 case NEON::BI__builtin_neon_vld1_lane_v: {
9126 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9127 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9128 Value *Ld = Builder.CreateLoad(PtrOp0);
9129 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9130 }
9131 case NEON::BI__builtin_neon_vqrshrn_n_v:
9132 Int =
9133 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9134 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9135 1, true);
9136 case NEON::BI__builtin_neon_vqrshrun_n_v:
9137 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9138 Ops, "vqrshrun_n", 1, true);
9139 case NEON::BI__builtin_neon_vqshrn_n_v:
9140 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9141 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9142 1, true);
9143 case NEON::BI__builtin_neon_vqshrun_n_v:
9144 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9145 Ops, "vqshrun_n", 1, true);
9146 case NEON::BI__builtin_neon_vrecpe_v:
9147 case NEON::BI__builtin_neon_vrecpeq_v:
9148 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9149 Ops, "vrecpe");
9150 case NEON::BI__builtin_neon_vrshrn_n_v:
9151 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9152 Ops, "vrshrn_n", 1, true);
9153 case NEON::BI__builtin_neon_vrsra_n_v:
9154 case NEON::BI__builtin_neon_vrsraq_n_v:
9155 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9156 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9157 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9158 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9159 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9160 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9161 case NEON::BI__builtin_neon_vsri_n_v:
9162 case NEON::BI__builtin_neon_vsriq_n_v:
9163 rightShift = true;
9164 [[fallthrough]];
9165 case NEON::BI__builtin_neon_vsli_n_v:
9166 case NEON::BI__builtin_neon_vsliq_n_v:
9167 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9168 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9169 Ops, "vsli_n");
9170 case NEON::BI__builtin_neon_vsra_n_v:
9171 case NEON::BI__builtin_neon_vsraq_n_v:
9172 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9173 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9174 return Builder.CreateAdd(Ops[0], Ops[1]);
9175 case NEON::BI__builtin_neon_vst1q_lane_v:
9176 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9177 // a one-element vector and avoid poor code for i64 in the backend.
9178 if (VTy->getElementType()->isIntegerTy(64)) {
9179 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9180 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9181 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9182 Ops[2] = getAlignmentValue32(PtrOp0);
9183 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9184 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9185 Tys), Ops);
9186 }
9187 [[fallthrough]];
9188 case NEON::BI__builtin_neon_vst1_lane_v: {
9189 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9190 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9191 return Builder.CreateStore(Ops[1],
9192 PtrOp0.withElementType(Ops[1]->getType()));
9193 }
9194 case NEON::BI__builtin_neon_vtbl1_v:
9195 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9196 Ops, "vtbl1");
9197 case NEON::BI__builtin_neon_vtbl2_v:
9198 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9199 Ops, "vtbl2");
9200 case NEON::BI__builtin_neon_vtbl3_v:
9201 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9202 Ops, "vtbl3");
9203 case NEON::BI__builtin_neon_vtbl4_v:
9204 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9205 Ops, "vtbl4");
9206 case NEON::BI__builtin_neon_vtbx1_v:
9207 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9208 Ops, "vtbx1");
9209 case NEON::BI__builtin_neon_vtbx2_v:
9210 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9211 Ops, "vtbx2");
9212 case NEON::BI__builtin_neon_vtbx3_v:
9213 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9214 Ops, "vtbx3");
9215 case NEON::BI__builtin_neon_vtbx4_v:
9216 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9217 Ops, "vtbx4");
9218 }
9219}
9220
9221template<typename Integer>
9223 return E->getIntegerConstantExpr(Context)->getExtValue();
9224}
9225
9226static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9227 llvm::Type *T, bool Unsigned) {
9228 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9229 // which finds it convenient to specify signed/unsigned as a boolean flag.
9230 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9231}
9232
9233static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9234 uint32_t Shift, bool Unsigned) {
9235 // MVE helper function for integer shift right. This must handle signed vs
9236 // unsigned, and also deal specially with the case where the shift count is
9237 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9238 // undefined behavior, but in MVE it's legal, so we must convert it to code
9239 // that is not undefined in IR.
9240 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9241 ->getElementType()
9242 ->getPrimitiveSizeInBits();
9243 if (Shift == LaneBits) {
9244 // An unsigned shift of the full lane size always generates zero, so we can
9245 // simply emit a zero vector. A signed shift of the full lane size does the
9246 // same thing as shifting by one bit fewer.
9247 if (Unsigned)
9248 return llvm::Constant::getNullValue(V->getType());
9249 else
9250 --Shift;
9251 }
9252 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9253}
9254
9255static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9256 // MVE-specific helper function for a vector splat, which infers the element
9257 // count of the output vector by knowing that MVE vectors are all 128 bits
9258 // wide.
9259 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9260 return Builder.CreateVectorSplat(Elements, V);
9261}
9262
9263static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9264 CodeGenFunction *CGF,
9265 llvm::Value *V,
9266 llvm::Type *DestType) {
9267 // Convert one MVE vector type into another by reinterpreting its in-register
9268 // format.
9269 //
9270 // Little-endian, this is identical to a bitcast (which reinterprets the
9271 // memory format). But big-endian, they're not necessarily the same, because
9272 // the register and memory formats map to each other differently depending on
9273 // the lane size.
9274 //
9275 // We generate a bitcast whenever we can (if we're little-endian, or if the
9276 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9277 // that performs the different kind of reinterpretation.
9278 if (CGF->getTarget().isBigEndian() &&
9279 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9280 return Builder.CreateCall(
9281 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9282 {DestType, V->getType()}),
9283 V);
9284 } else {
9285 return Builder.CreateBitCast(V, DestType);
9286 }
9287}
9288
9289static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9290 // Make a shufflevector that extracts every other element of a vector (evens
9291 // or odds, as desired).
9292 SmallVector<int, 16> Indices;
9293 unsigned InputElements =
9294 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9295 for (unsigned i = 0; i < InputElements; i += 2)
9296 Indices.push_back(i + Odd);
9297 return Builder.CreateShuffleVector(V, Indices);
9298}
9299
9300static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9301 llvm::Value *V1) {
9302 // Make a shufflevector that interleaves two vectors element by element.
9303 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9304 SmallVector<int, 16> Indices;
9305 unsigned InputElements =
9306 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9307 for (unsigned i = 0; i < InputElements; i++) {
9308 Indices.push_back(i);
9309 Indices.push_back(i + InputElements);
9310 }
9311 return Builder.CreateShuffleVector(V0, V1, Indices);
9312}
9313
9314template<unsigned HighBit, unsigned OtherBits>
9315static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9316 // MVE-specific helper function to make a vector splat of a constant such as
9317 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9318 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9319 unsigned LaneBits = T->getPrimitiveSizeInBits();
9320 uint32_t Value = HighBit << (LaneBits - 1);
9321 if (OtherBits)
9322 Value |= (1UL << (LaneBits - 1)) - 1;
9323 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9324 return ARMMVEVectorSplat(Builder, Lane);
9325}
9326
9327static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9328 llvm::Value *V,
9329 unsigned ReverseWidth) {
9330 // MVE-specific helper function which reverses the elements of a
9331 // vector within every (ReverseWidth)-bit collection of lanes.
9332 SmallVector<int, 16> Indices;
9333 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9334 unsigned Elements = 128 / LaneSize;
9335 unsigned Mask = ReverseWidth / LaneSize - 1;
9336 for (unsigned i = 0; i < Elements; i++)
9337 Indices.push_back(i ^ Mask);
9338 return Builder.CreateShuffleVector(V, Indices);
9339}
9340
9342 const CallExpr *E,
9343 ReturnValueSlot ReturnValue,
9344 llvm::Triple::ArchType Arch) {
9345 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9346 Intrinsic::ID IRIntr;
9347 unsigned NumVectors;
9348
9349 // Code autogenerated by Tablegen will handle all the simple builtins.
9350 switch (BuiltinID) {
9351 #include "clang/Basic/arm_mve_builtin_cg.inc"
9352
9353 // If we didn't match an MVE builtin id at all, go back to the
9354 // main EmitARMBuiltinExpr.
9355 default:
9356 return nullptr;
9357 }
9358
9359 // Anything that breaks from that switch is an MVE builtin that
9360 // needs handwritten code to generate.
9361
9362 switch (CustomCodeGenType) {
9363
9364 case CustomCodeGen::VLD24: {
9367
9368 auto MvecCType = E->getType();
9369 auto MvecLType = ConvertType(MvecCType);
9370 assert(MvecLType->isStructTy() &&
9371 "Return type for vld[24]q should be a struct");
9372 assert(MvecLType->getStructNumElements() == 1 &&
9373 "Return-type struct for vld[24]q should have one element");
9374 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9375 assert(MvecLTypeInner->isArrayTy() &&
9376 "Return-type struct for vld[24]q should contain an array");
9377 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9378 "Array member of return-type struct vld[24]q has wrong length");
9379 auto VecLType = MvecLTypeInner->getArrayElementType();
9380
9381 Tys.push_back(VecLType);
9382
9383 auto Addr = E->getArg(0);
9384 Ops.push_back(EmitScalarExpr(Addr));
9385 Tys.push_back(ConvertType(Addr->getType()));
9386
9387 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9388 Value *LoadResult = Builder.CreateCall(F, Ops);
9389 Value *MvecOut = PoisonValue::get(MvecLType);
9390 for (unsigned i = 0; i < NumVectors; ++i) {
9391 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9392 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9393 }
9394
9395 if (ReturnValue.isNull())
9396 return MvecOut;
9397 else
9398 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9399 }
9400
9401 case CustomCodeGen::VST24: {
9404
9405 auto Addr = E->getArg(0);
9406 Ops.push_back(EmitScalarExpr(Addr));
9407 Tys.push_back(ConvertType(Addr->getType()));
9408
9409 auto MvecCType = E->getArg(1)->getType();
9410 auto MvecLType = ConvertType(MvecCType);
9411 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9412 assert(MvecLType->getStructNumElements() == 1 &&
9413 "Data-type struct for vst2q should have one element");
9414 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9415 assert(MvecLTypeInner->isArrayTy() &&
9416 "Data-type struct for vst2q should contain an array");
9417 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9418 "Array member of return-type struct vld[24]q has wrong length");
9419 auto VecLType = MvecLTypeInner->getArrayElementType();
9420
9421 Tys.push_back(VecLType);
9422
9423 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9424 EmitAggExpr(E->getArg(1), MvecSlot);
9425 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9426 for (unsigned i = 0; i < NumVectors; i++)
9427 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9428
9429 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9430 Value *ToReturn = nullptr;
9431 for (unsigned i = 0; i < NumVectors; i++) {
9432 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9433 ToReturn = Builder.CreateCall(F, Ops);
9434 Ops.pop_back();
9435 }
9436 return ToReturn;
9437 }
9438 }
9439 llvm_unreachable("unknown custom codegen type.");
9440}
9441
9443 const CallExpr *E,
9444 ReturnValueSlot ReturnValue,
9445 llvm::Triple::ArchType Arch) {
9446 switch (BuiltinID) {
9447 default:
9448 return nullptr;
9449#include "clang/Basic/arm_cde_builtin_cg.inc"
9450 }
9451}
9452
9453static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9454 const CallExpr *E,
9456 llvm::Triple::ArchType Arch) {
9457 unsigned int Int = 0;
9458 const char *s = nullptr;
9459
9460 switch (BuiltinID) {
9461 default:
9462 return nullptr;
9463 case NEON::BI__builtin_neon_vtbl1_v:
9464 case NEON::BI__builtin_neon_vqtbl1_v:
9465 case NEON::BI__builtin_neon_vqtbl1q_v:
9466 case NEON::BI__builtin_neon_vtbl2_v:
9467 case NEON::BI__builtin_neon_vqtbl2_v:
9468 case NEON::BI__builtin_neon_vqtbl2q_v:
9469 case NEON::BI__builtin_neon_vtbl3_v:
9470 case NEON::BI__builtin_neon_vqtbl3_v:
9471 case NEON::BI__builtin_neon_vqtbl3q_v:
9472 case NEON::BI__builtin_neon_vtbl4_v:
9473 case NEON::BI__builtin_neon_vqtbl4_v:
9474 case NEON::BI__builtin_neon_vqtbl4q_v:
9475 break;
9476 case NEON::BI__builtin_neon_vtbx1_v:
9477 case NEON::BI__builtin_neon_vqtbx1_v:
9478 case NEON::BI__builtin_neon_vqtbx1q_v:
9479 case NEON::BI__builtin_neon_vtbx2_v:
9480 case NEON::BI__builtin_neon_vqtbx2_v:
9481 case NEON::BI__builtin_neon_vqtbx2q_v:
9482 case NEON::BI__builtin_neon_vtbx3_v:
9483 case NEON::BI__builtin_neon_vqtbx3_v:
9484 case NEON::BI__builtin_neon_vqtbx3q_v:
9485 case NEON::BI__builtin_neon_vtbx4_v:
9486 case NEON::BI__builtin_neon_vqtbx4_v:
9487 case NEON::BI__builtin_neon_vqtbx4q_v:
9488 break;
9489 }
9490
9491 assert(E->getNumArgs() >= 3);
9492
9493 // Get the last argument, which specifies the vector type.
9494 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9495 std::optional<llvm::APSInt> Result =
9497 if (!Result)
9498 return nullptr;
9499
9500 // Determine the type of this overloaded NEON intrinsic.
9501 NeonTypeFlags Type = Result->getZExtValue();
9502 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9503 if (!Ty)
9504 return nullptr;
9505
9506 CodeGen::CGBuilderTy &Builder = CGF.Builder;
9507
9508 // AArch64 scalar builtins are not overloaded, they do not have an extra
9509 // argument that specifies the vector type, need to handle each case.
9510 switch (BuiltinID) {
9511 case NEON::BI__builtin_neon_vtbl1_v: {
9512 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9513 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9514 }
9515 case NEON::BI__builtin_neon_vtbl2_v: {
9516 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9517 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9518 }
9519 case NEON::BI__builtin_neon_vtbl3_v: {
9520 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9521 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9522 }
9523 case NEON::BI__builtin_neon_vtbl4_v: {
9524 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9525 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9526 }
9527 case NEON::BI__builtin_neon_vtbx1_v: {
9528 Value *TblRes =
9529 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9530 Intrinsic::aarch64_neon_tbl1, "vtbl1");
9531
9532 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9533 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9534 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9535
9536 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9537 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9538 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9539 }
9540 case NEON::BI__builtin_neon_vtbx2_v: {
9541 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9542 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9543 }
9544 case NEON::BI__builtin_neon_vtbx3_v: {
9545 Value *TblRes =
9546 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9547 Intrinsic::aarch64_neon_tbl2, "vtbl2");
9548
9549 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9550 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9551 TwentyFourV);
9552 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9553
9554 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9555 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9556 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9557 }
9558 case NEON::BI__builtin_neon_vtbx4_v: {
9559 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9560 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9561 }
9562 case NEON::BI__builtin_neon_vqtbl1_v:
9563 case NEON::BI__builtin_neon_vqtbl1q_v:
9564 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9565 case NEON::BI__builtin_neon_vqtbl2_v:
9566 case NEON::BI__builtin_neon_vqtbl2q_v: {
9567 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9568 case NEON::BI__builtin_neon_vqtbl3_v:
9569 case NEON::BI__builtin_neon_vqtbl3q_v:
9570 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9571 case NEON::BI__builtin_neon_vqtbl4_v:
9572 case NEON::BI__builtin_neon_vqtbl4q_v:
9573 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9574 case NEON::BI__builtin_neon_vqtbx1_v:
9575 case NEON::BI__builtin_neon_vqtbx1q_v:
9576 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9577 case NEON::BI__builtin_neon_vqtbx2_v:
9578 case NEON::BI__builtin_neon_vqtbx2q_v:
9579 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9580 case NEON::BI__builtin_neon_vqtbx3_v:
9581 case NEON::BI__builtin_neon_vqtbx3q_v:
9582 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9583 case NEON::BI__builtin_neon_vqtbx4_v:
9584 case NEON::BI__builtin_neon_vqtbx4q_v:
9585 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9586 }
9587 }
9588
9589 if (!Int)
9590 return nullptr;
9591
9592 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9593 return CGF.EmitNeonCall(F, Ops, s);
9594}
9595
9597 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9598 Op = Builder.CreateBitCast(Op, Int16Ty);
9599 Value *V = PoisonValue::get(VTy);
9600 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9601 Op = Builder.CreateInsertElement(V, Op, CI);
9602 return Op;
9603}
9604
9605/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9606/// access builtin. Only required if it can't be inferred from the base pointer
9607/// operand.
9608llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
9609 switch (TypeFlags.getMemEltType()) {
9610 case SVETypeFlags::MemEltTyDefault:
9611 return getEltType(TypeFlags);
9612 case SVETypeFlags::MemEltTyInt8:
9613 return Builder.getInt8Ty();
9614 case SVETypeFlags::MemEltTyInt16:
9615 return Builder.getInt16Ty();
9616 case SVETypeFlags::MemEltTyInt32:
9617 return Builder.getInt32Ty();
9618 case SVETypeFlags::MemEltTyInt64:
9619 return Builder.getInt64Ty();
9620 }
9621 llvm_unreachable("Unknown MemEltType");
9622}
9623
9624llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
9625 switch (TypeFlags.getEltType()) {
9626 default:
9627 llvm_unreachable("Invalid SVETypeFlag!");
9628
9629 case SVETypeFlags::EltTyInt8:
9630 return Builder.getInt8Ty();
9631 case SVETypeFlags::EltTyInt16:
9632 return Builder.getInt16Ty();
9633 case SVETypeFlags::EltTyInt32:
9634 return Builder.getInt32Ty();
9635 case SVETypeFlags::EltTyInt64:
9636 return Builder.getInt64Ty();
9637 case SVETypeFlags::EltTyInt128:
9638 return Builder.getInt128Ty();
9639
9640 case SVETypeFlags::EltTyFloat16:
9641 return Builder.getHalfTy();
9642 case SVETypeFlags::EltTyFloat32:
9643 return Builder.getFloatTy();
9644 case SVETypeFlags::EltTyFloat64:
9645 return Builder.getDoubleTy();
9646
9647 case SVETypeFlags::EltTyBFloat16:
9648 return Builder.getBFloatTy();
9649
9650 case SVETypeFlags::EltTyBool8:
9651 case SVETypeFlags::EltTyBool16:
9652 case SVETypeFlags::EltTyBool32:
9653 case SVETypeFlags::EltTyBool64:
9654 return Builder.getInt1Ty();
9655 }
9656}
9657
9658// Return the llvm predicate vector type corresponding to the specified element
9659// TypeFlags.
9660llvm::ScalableVectorType *
9662 switch (TypeFlags.getEltType()) {
9663 default: llvm_unreachable("Unhandled SVETypeFlag!");
9664
9665 case SVETypeFlags::EltTyInt8:
9666 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9667 case SVETypeFlags::EltTyInt16:
9668 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9669 case SVETypeFlags::EltTyInt32:
9670 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9671 case SVETypeFlags::EltTyInt64:
9672 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9673
9674 case SVETypeFlags::EltTyBFloat16:
9675 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9676 case SVETypeFlags::EltTyFloat16:
9677 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9678 case SVETypeFlags::EltTyFloat32:
9679 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9680 case SVETypeFlags::EltTyFloat64:
9681 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9682
9683 case SVETypeFlags::EltTyBool8:
9684 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9685 case SVETypeFlags::EltTyBool16:
9686 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9687 case SVETypeFlags::EltTyBool32:
9688 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9689 case SVETypeFlags::EltTyBool64:
9690 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9691 }
9692}
9693
9694// Return the llvm vector type corresponding to the specified element TypeFlags.
9695llvm::ScalableVectorType *
9696CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
9697 switch (TypeFlags.getEltType()) {
9698 default:
9699 llvm_unreachable("Invalid SVETypeFlag!");
9700
9701 case SVETypeFlags::EltTyInt8:
9702 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
9703 case SVETypeFlags::EltTyInt16:
9704 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
9705 case SVETypeFlags::EltTyInt32:
9706 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
9707 case SVETypeFlags::EltTyInt64:
9708 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
9709
9710 case SVETypeFlags::EltTyFloat16:
9711 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
9712 case SVETypeFlags::EltTyBFloat16:
9713 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
9714 case SVETypeFlags::EltTyFloat32:
9715 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
9716 case SVETypeFlags::EltTyFloat64:
9717 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
9718
9719 case SVETypeFlags::EltTyBool8:
9720 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9721 case SVETypeFlags::EltTyBool16:
9722 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9723 case SVETypeFlags::EltTyBool32:
9724 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9725 case SVETypeFlags::EltTyBool64:
9726 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9727 }
9728}
9729
9730llvm::Value *
9732 Function *Ptrue =
9733 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9734 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
9735}
9736
9737constexpr unsigned SVEBitsPerBlock = 128;
9738
9739static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9740 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9741 return llvm::ScalableVectorType::get(EltTy, NumElts);
9742}
9743
9744// Reinterpret the input predicate so that it can be used to correctly isolate
9745// the elements of the specified datatype.
9747 llvm::ScalableVectorType *VTy) {
9748
9749 if (isa<TargetExtType>(Pred->getType()) &&
9750 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
9751 return Pred;
9752
9753 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
9754 if (Pred->getType() == RTy)
9755 return Pred;
9756
9757 unsigned IntID;
9758 llvm::Type *IntrinsicTy;
9759 switch (VTy->getMinNumElements()) {
9760 default:
9761 llvm_unreachable("unsupported element count!");
9762 case 1:
9763 case 2:
9764 case 4:
9765 case 8:
9766 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9767 IntrinsicTy = RTy;
9768 break;
9769 case 16:
9770 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9771 IntrinsicTy = Pred->getType();
9772 break;
9773 }
9774
9775 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
9776 Value *C = Builder.CreateCall(F, Pred);
9777 assert(C->getType() == RTy && "Unexpected return type!");
9778 return C;
9779}
9780
9783 unsigned IntID) {
9784 auto *ResultTy = getSVEType(TypeFlags);
9785 auto *OverloadedTy =
9786 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
9787
9788 Function *F = nullptr;
9789 if (Ops[1]->getType()->isVectorTy())
9790 // This is the "vector base, scalar offset" case. In order to uniquely
9791 // map this built-in to an LLVM IR intrinsic, we need both the return type
9792 // and the type of the vector base.
9793 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
9794 else
9795 // This is the "scalar base, vector offset case". The type of the offset
9796 // is encoded in the name of the intrinsic. We only need to specify the
9797 // return type in order to uniquely map this built-in to an LLVM IR
9798 // intrinsic.
9799 F = CGM.getIntrinsic(IntID, OverloadedTy);
9800
9801 // At the ACLE level there's only one predicate type, svbool_t, which is
9802 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9803 // actual type being loaded. For example, when loading doubles (i64) the
9804 // predicate should be <n x 2 x i1> instead. At the IR level the type of
9805 // the predicate and the data being loaded must match. Cast to the type
9806 // expected by the intrinsic. The intrinsic itself should be defined in
9807 // a way than enforces relations between parameter types.
9808 Ops[0] = EmitSVEPredicateCast(
9809 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
9810
9811 // Pass 0 when the offset is missing. This can only be applied when using
9812 // the "vector base" addressing mode for which ACLE allows no offset. The
9813 // corresponding LLVM IR always requires an offset.
9814 if (Ops.size() == 2) {
9815 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9816 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9817 }
9818
9819 // For "vector base, scalar index" scale the index so that it becomes a
9820 // scalar offset.
9821 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
9822 unsigned BytesPerElt =
9823 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9824 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9825 }
9826
9827 Value *Call = Builder.CreateCall(F, Ops);
9828
9829 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9830 // other cases it's folded into a nop.
9831 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
9832 : Builder.CreateSExt(Call, ResultTy);
9833}
9834
9837 unsigned IntID) {
9838 auto *SrcDataTy = getSVEType(TypeFlags);
9839 auto *OverloadedTy =
9840 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
9841
9842 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9843 // it's the first argument. Move it accordingly.
9844 Ops.insert(Ops.begin(), Ops.pop_back_val());
9845
9846 Function *F = nullptr;
9847 if (Ops[2]->getType()->isVectorTy())
9848 // This is the "vector base, scalar offset" case. In order to uniquely
9849 // map this built-in to an LLVM IR intrinsic, we need both the return type
9850 // and the type of the vector base.
9851 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
9852 else
9853 // This is the "scalar base, vector offset case". The type of the offset
9854 // is encoded in the name of the intrinsic. We only need to specify the
9855 // return type in order to uniquely map this built-in to an LLVM IR
9856 // intrinsic.
9857 F = CGM.getIntrinsic(IntID, OverloadedTy);
9858
9859 // Pass 0 when the offset is missing. This can only be applied when using
9860 // the "vector base" addressing mode for which ACLE allows no offset. The
9861 // corresponding LLVM IR always requires an offset.
9862 if (Ops.size() == 3) {
9863 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9864 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9865 }
9866
9867 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9868 // folded into a nop.
9869 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
9870
9871 // At the ACLE level there's only one predicate type, svbool_t, which is
9872 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9873 // actual type being stored. For example, when storing doubles (i64) the
9874 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9875 // the predicate and the data being stored must match. Cast to the type
9876 // expected by the intrinsic. The intrinsic itself should be defined in
9877 // a way that enforces relations between parameter types.
9878 Ops[1] = EmitSVEPredicateCast(
9879 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
9880
9881 // For "vector base, scalar index" scale the index so that it becomes a
9882 // scalar offset.
9883 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
9884 unsigned BytesPerElt =
9885 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9886 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
9887 }
9888
9889 return Builder.CreateCall(F, Ops);
9890}
9891
9894 unsigned IntID) {
9895 // The gather prefetches are overloaded on the vector input - this can either
9896 // be the vector of base addresses or vector of offsets.
9897 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
9898 if (!OverloadedTy)
9899 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
9900
9901 // Cast the predicate from svbool_t to the right number of elements.
9902 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9903
9904 // vector + imm addressing modes
9905 if (Ops[1]->getType()->isVectorTy()) {
9906 if (Ops.size() == 3) {
9907 // Pass 0 for 'vector+imm' when the index is omitted.
9908 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9909
9910 // The sv_prfop is the last operand in the builtin and IR intrinsic.
9911 std::swap(Ops[2], Ops[3]);
9912 } else {
9913 // Index needs to be passed as scaled offset.
9914 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9915 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
9916 if (BytesPerElt > 1)
9917 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9918 }
9919 }
9920
9921 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
9922 return Builder.CreateCall(F, Ops);
9923}
9924
9927 unsigned IntID) {
9928 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9929
9930 unsigned N;
9931 switch (IntID) {
9932 case Intrinsic::aarch64_sve_ld2_sret:
9933 case Intrinsic::aarch64_sve_ld1_pn_x2:
9934 case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9935 case Intrinsic::aarch64_sve_ld2q_sret:
9936 N = 2;
9937 break;
9938 case Intrinsic::aarch64_sve_ld3_sret:
9939 case Intrinsic::aarch64_sve_ld3q_sret:
9940 N = 3;
9941 break;
9942 case Intrinsic::aarch64_sve_ld4_sret:
9943 case Intrinsic::aarch64_sve_ld1_pn_x4:
9944 case Intrinsic::aarch64_sve_ldnt1_pn_x4:
9945 case Intrinsic::aarch64_sve_ld4q_sret:
9946 N = 4;
9947 break;
9948 default:
9949 llvm_unreachable("unknown intrinsic!");
9950 }
9951 auto RetTy = llvm::VectorType::get(VTy->getElementType(),
9952 VTy->getElementCount() * N);
9953
9954 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
9955 Value *BasePtr = Ops[1];
9956
9957 // Does the load have an offset?
9958 if (Ops.size() > 2)
9959 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
9960
9961 Function *F = CGM.getIntrinsic(IntID, {VTy});
9962 Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
9963 unsigned MinElts = VTy->getMinNumElements();
9964 Value *Ret = llvm::PoisonValue::get(RetTy);
9965 for (unsigned I = 0; I < N; I++) {
9966 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9967 Value *SRet = Builder.CreateExtractValue(Call, I);
9968 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
9969 }
9970 return Ret;
9971}
9972
9975 unsigned IntID) {
9976 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9977
9978 unsigned N;
9979 switch (IntID) {
9980 case Intrinsic::aarch64_sve_st2:
9981 case Intrinsic::aarch64_sve_st1_pn_x2:
9982 case Intrinsic::aarch64_sve_stnt1_pn_x2:
9983 case Intrinsic::aarch64_sve_st2q:
9984 N = 2;
9985 break;
9986 case Intrinsic::aarch64_sve_st3:
9987 case Intrinsic::aarch64_sve_st3q:
9988 N = 3;
9989 break;
9990 case Intrinsic::aarch64_sve_st4:
9991 case Intrinsic::aarch64_sve_st1_pn_x4:
9992 case Intrinsic::aarch64_sve_stnt1_pn_x4:
9993 case Intrinsic::aarch64_sve_st4q:
9994 N = 4;
9995 break;
9996 default:
9997 llvm_unreachable("unknown intrinsic!");
9998 }
9999
10000 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10001 Value *BasePtr = Ops[1];
10002
10003 // Does the store have an offset?
10004 if (Ops.size() > (2 + N))
10005 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10006
10007 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10008 // need to break up the tuple vector.
10010 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10011 Operands.push_back(Ops[I]);
10012 Operands.append({Predicate, BasePtr});
10013 Function *F = CGM.getIntrinsic(IntID, { VTy });
10014
10015 return Builder.CreateCall(F, Operands);
10016}
10017
10018// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10019// svpmullt_pair intrinsics, with the exception that their results are bitcast
10020// to a wider type.
10023 unsigned BuiltinID) {
10024 // Splat scalar operand to vector (intrinsics with _n infix)
10025 if (TypeFlags.hasSplatOperand()) {
10026 unsigned OpNo = TypeFlags.getSplatOperand();
10027 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10028 }
10029
10030 // The pair-wise function has a narrower overloaded type.
10031 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10032 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10033
10034 // Now bitcast to the wider result type.
10035 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10036 return EmitSVEReinterpret(Call, Ty);
10037}
10038
10040 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10041 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10042 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10043 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10044}
10045
10048 unsigned BuiltinID) {
10049 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10050 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10051 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10052
10053 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10054 Value *BasePtr = Ops[1];
10055
10056 // Implement the index operand if not omitted.
10057 if (Ops.size() > 3)
10058 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10059
10060 Value *PrfOp = Ops.back();
10061
10062 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10063 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10064}
10065
10067 llvm::Type *ReturnTy,
10069 unsigned IntrinsicID,
10070 bool IsZExtReturn) {
10071 QualType LangPTy = E->getArg(1)->getType();
10072 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10073 LangPTy->castAs<PointerType>()->getPointeeType());
10074
10075 // The vector type that is returned may be different from the
10076 // eventual type loaded from memory.
10077 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10078 llvm::ScalableVectorType *MemoryTy = nullptr;
10079 llvm::ScalableVectorType *PredTy = nullptr;
10080 bool IsQuadLoad = false;
10081 switch (IntrinsicID) {
10082 case Intrinsic::aarch64_sve_ld1uwq:
10083 case Intrinsic::aarch64_sve_ld1udq:
10084 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10085 PredTy = llvm::ScalableVectorType::get(
10086 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10087 IsQuadLoad = true;
10088 break;
10089 default:
10090 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10091 PredTy = MemoryTy;
10092 break;
10093 }
10094
10095 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10096 Value *BasePtr = Ops[1];
10097
10098 // Does the load have an offset?
10099 if (Ops.size() > 2)
10100 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10101
10102 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10103 auto *Load =
10104 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10105 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10106 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10107
10108 if (IsQuadLoad)
10109 return Load;
10110
10111 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10112 : Builder.CreateSExt(Load, VectorTy);
10113}
10114
10117 unsigned IntrinsicID) {
10118 QualType LangPTy = E->getArg(1)->getType();
10119 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10120 LangPTy->castAs<PointerType>()->getPointeeType());
10121
10122 // The vector type that is stored may be different from the
10123 // eventual type stored to memory.
10124 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10125 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10126
10127 auto PredTy = MemoryTy;
10128 auto AddrMemoryTy = MemoryTy;
10129 bool IsQuadStore = false;
10130
10131 switch (IntrinsicID) {
10132 case Intrinsic::aarch64_sve_st1wq:
10133 case Intrinsic::aarch64_sve_st1dq:
10134 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10135 PredTy =
10136 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10137 IsQuadStore = true;
10138 break;
10139 default:
10140 break;
10141 }
10142 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10143 Value *BasePtr = Ops[1];
10144
10145 // Does the store have an offset?
10146 if (Ops.size() == 4)
10147 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10148
10149 // Last value is always the data
10150 Value *Val =
10151 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10152
10153 Function *F =
10154 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10155 auto *Store =
10156 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10157 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10158 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10159 return Store;
10160}
10161
10164 unsigned IntID) {
10165 Ops[2] = EmitSVEPredicateCast(
10167
10168 SmallVector<Value *> NewOps;
10169 NewOps.push_back(Ops[2]);
10170
10171 llvm::Value *BasePtr = Ops[3];
10172
10173 // If the intrinsic contains the vnum parameter, multiply it with the vector
10174 // size in bytes.
10175 if (Ops.size() == 5) {
10176 Function *StreamingVectorLength =
10177 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10178 llvm::Value *StreamingVectorLengthCall =
10179 Builder.CreateCall(StreamingVectorLength);
10180 llvm::Value *Mulvl =
10181 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10182 // The type of the ptr parameter is void *, so use Int8Ty here.
10183 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10184 }
10185 NewOps.push_back(BasePtr);
10186 NewOps.push_back(Ops[0]);
10187 NewOps.push_back(Ops[1]);
10188 Function *F = CGM.getIntrinsic(IntID);
10189 return Builder.CreateCall(F, NewOps);
10190}
10191
10194 unsigned IntID) {
10195 auto *VecTy = getSVEType(TypeFlags);
10196 Function *F = CGM.getIntrinsic(IntID, VecTy);
10197 if (TypeFlags.isReadZA())
10198 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10199 else if (TypeFlags.isWriteZA())
10200 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10201 return Builder.CreateCall(F, Ops);
10202}
10203
10206 unsigned IntID) {
10207 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10208 if (Ops.size() == 0)
10209 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10210 Function *F = CGM.getIntrinsic(IntID, {});
10211 return Builder.CreateCall(F, Ops);
10212}
10213
10216 unsigned IntID) {
10217 if (Ops.size() == 2)
10218 Ops.push_back(Builder.getInt32(0));
10219 else
10220 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10221 Function *F = CGM.getIntrinsic(IntID, {});
10222 return Builder.CreateCall(F, Ops);
10223}
10224
10225// Limit the usage of scalable llvm IR generated by the ACLE by using the
10226// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10227Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10228 return Builder.CreateVectorSplat(
10229 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10230}
10231
10233 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10234}
10235
10236Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10237 // FIXME: For big endian this needs an additional REV, or needs a separate
10238 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10239 // instruction is defined as 'bitwise' equivalent from memory point of
10240 // view (when storing/reloading), whereas the svreinterpret builtin
10241 // implements bitwise equivalent cast from register point of view.
10242 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10243 return Builder.CreateBitCast(Val, Ty);
10244}
10245
10246static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10248 auto *SplatZero = Constant::getNullValue(Ty);
10249 Ops.insert(Ops.begin(), SplatZero);
10250}
10251
10252static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10254 auto *SplatUndef = UndefValue::get(Ty);
10255 Ops.insert(Ops.begin(), SplatUndef);
10256}
10257
10260 llvm::Type *ResultType,
10261 ArrayRef<Value *> Ops) {
10262 if (TypeFlags.isOverloadNone())
10263 return {};
10264
10265 llvm::Type *DefaultType = getSVEType(TypeFlags);
10266
10267 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10268 return {DefaultType, Ops[1]->getType()};
10269
10270 if (TypeFlags.isOverloadWhileRW())
10271 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10272
10273 if (TypeFlags.isOverloadCvt())
10274 return {Ops[0]->getType(), Ops.back()->getType()};
10275
10276 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10277 ResultType->isVectorTy())
10278 return {ResultType, Ops[1]->getType()};
10279
10280 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10281 return {DefaultType};
10282}
10283
10285 llvm::Type *Ty,
10286 ArrayRef<Value *> Ops) {
10287 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10288 "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()");
10289
10290 unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10291 auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10292 TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10293 Value *Idx = ConstantInt::get(CGM.Int64Ty,
10294 I * SingleVecTy->getMinNumElements());
10295
10296 if (TypeFlags.isTupleSet())
10297 return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10298 return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10299}
10300
10302 llvm::Type *Ty,
10303 ArrayRef<Value *> Ops) {
10304 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10305
10306 auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10307 unsigned MinElts = SrcTy->getMinNumElements();
10308 Value *Call = llvm::PoisonValue::get(Ty);
10309 for (unsigned I = 0; I < Ops.size(); I++) {
10310 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10311 Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10312 }
10313
10314 return Call;
10315}
10316
10318 // Multi-vector results should be broken up into a single (wide) result
10319 // vector.
10320 auto *StructTy = dyn_cast<StructType>(Call->getType());
10321 if (!StructTy)
10322 return Call;
10323
10324 auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
10325 if (!VTy)
10326 return Call;
10327 unsigned N = StructTy->getNumElements();
10328
10329 // We may need to emit a cast to a svbool_t
10330 bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
10331 unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
10332
10333 ScalableVectorType *WideVTy =
10334 ScalableVectorType::get(VTy->getElementType(), MinElts * N);
10335 Value *Ret = llvm::PoisonValue::get(WideVTy);
10336 for (unsigned I = 0; I < N; ++I) {
10337 Value *SRet = Builder.CreateExtractValue(Call, I);
10338 assert(SRet->getType() == VTy && "Unexpected type for result value");
10339 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10340
10341 if (IsPredTy)
10342 SRet = EmitSVEPredicateCast(
10343 SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
10344
10345 Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
10346 }
10347 Call = Ret;
10348
10349 return Call;
10350}
10351
10353 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10354 SVETypeFlags TypeFlags) {
10355 // Find out if any arguments are required to be integer constant expressions.
10356 unsigned ICEArguments = 0;
10358 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10359 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10360
10361 // Tuple set/get only requires one insert/extract vector, which is
10362 // created by EmitSVETupleSetOrGet.
10363 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10364
10365 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10366 bool IsICE = ICEArguments & (1 << i);
10367 Value *Arg = EmitScalarExpr(E->getArg(i));
10368
10369 if (IsICE) {
10370 // If this is required to be a constant, constant fold it so that we know
10371 // that the generated intrinsic gets a ConstantInt.
10372 std::optional<llvm::APSInt> Result =
10374 assert(Result && "Expected argument to be a constant");
10375
10376 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10377 // truncate because the immediate has been range checked and no valid
10378 // immediate requires more than a handful of bits.
10379 *Result = Result->extOrTrunc(32);
10380 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10381 continue;
10382 }
10383
10384 if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10385 Ops.push_back(Arg);
10386 continue;
10387 }
10388
10389 auto *VTy = cast<ScalableVectorType>(Arg->getType());
10390 unsigned MinElts = VTy->getMinNumElements();
10391 bool IsPred = VTy->getElementType()->isIntegerTy(1);
10392 unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10393
10394 if (N == 1) {
10395 Ops.push_back(Arg);
10396 continue;
10397 }
10398
10399 for (unsigned I = 0; I < N; ++I) {
10400 Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10401 auto *NewVTy =
10402 ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10403 Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10404 }
10405 }
10406}
10407
10409 const CallExpr *E) {
10410 llvm::Type *Ty = ConvertType(E->getType());
10411 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10412 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10413 Value *Val = EmitScalarExpr(E->getArg(0));
10414 return EmitSVEReinterpret(Val, Ty);
10415 }
10416
10417 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10419
10421 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10422 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10423
10424 if (TypeFlags.isLoad())
10425 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10426 TypeFlags.isZExtReturn());
10427 else if (TypeFlags.isStore())
10428 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10429 else if (TypeFlags.isGatherLoad())
10430 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10431 else if (TypeFlags.isScatterStore())
10432 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10433 else if (TypeFlags.isPrefetch())
10434 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10435 else if (TypeFlags.isGatherPrefetch())
10436 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10437 else if (TypeFlags.isStructLoad())
10438 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10439 else if (TypeFlags.isStructStore())
10440 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10441 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10442 return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10443 else if (TypeFlags.isTupleCreate())
10444 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10445 else if (TypeFlags.isUndef())
10446 return UndefValue::get(Ty);
10447 else if (Builtin->LLVMIntrinsic != 0) {
10448 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10450
10451 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10453
10454 // Some ACLE builtins leave out the argument to specify the predicate
10455 // pattern, which is expected to be expanded to an SV_ALL pattern.
10456 if (TypeFlags.isAppendSVALL())
10457 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10458 if (TypeFlags.isInsertOp1SVALL())
10459 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10460
10461 // Predicates must match the main datatype.
10462 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10463 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10464 if (PredTy->getElementType()->isIntegerTy(1))
10465 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10466
10467 // Splat scalar operand to vector (intrinsics with _n infix)
10468 if (TypeFlags.hasSplatOperand()) {
10469 unsigned OpNo = TypeFlags.getSplatOperand();
10470 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10471 }
10472
10473 if (TypeFlags.isReverseCompare())
10474 std::swap(Ops[1], Ops[2]);
10475 else if (TypeFlags.isReverseUSDOT())
10476 std::swap(Ops[1], Ops[2]);
10477 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10478 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10479 std::swap(Ops[1], Ops[2]);
10480 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10481 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10482 std::swap(Ops[1], Ops[3]);
10483
10484 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10485 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10486 llvm::Type *OpndTy = Ops[1]->getType();
10487 auto *SplatZero = Constant::getNullValue(OpndTy);
10488 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10489 }
10490
10491 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10492 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10493 Value *Call = Builder.CreateCall(F, Ops);
10494
10495 // Predicate results must be converted to svbool_t.
10496 if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10497 if (PredTy->getScalarType()->isIntegerTy(1))
10498 Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10499
10500 return FormSVEBuiltinResult(Call);
10501 }
10502
10503 switch (BuiltinID) {
10504 default:
10505 return nullptr;
10506
10507 case SVE::BI__builtin_sve_svreinterpret_b: {
10508 auto SVCountTy =
10509 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10510 Function *CastFromSVCountF =
10511 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10512 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10513 }
10514 case SVE::BI__builtin_sve_svreinterpret_c: {
10515 auto SVCountTy =
10516 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10517 Function *CastToSVCountF =
10518 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10519 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10520 }
10521
10522 case SVE::BI__builtin_sve_svpsel_lane_b8:
10523 case SVE::BI__builtin_sve_svpsel_lane_b16:
10524 case SVE::BI__builtin_sve_svpsel_lane_b32:
10525 case SVE::BI__builtin_sve_svpsel_lane_b64:
10526 case SVE::BI__builtin_sve_svpsel_lane_c8:
10527 case SVE::BI__builtin_sve_svpsel_lane_c16:
10528 case SVE::BI__builtin_sve_svpsel_lane_c32:
10529 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10530 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10531 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10532 "aarch64.svcount")) &&
10533 "Unexpected TargetExtType");
10534 auto SVCountTy =
10535 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10536 Function *CastFromSVCountF =
10537 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10538 Function *CastToSVCountF =
10539 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10540
10541 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10542 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10543 llvm::Value *Ops0 =
10544 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10545 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10546 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10547 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10548 }
10549 case SVE::BI__builtin_sve_svmov_b_z: {
10550 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10551 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10552 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10553 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10554 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10555 }
10556
10557 case SVE::BI__builtin_sve_svnot_b_z: {
10558 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10559 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10560 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10561 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10562 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10563 }
10564
10565 case SVE::BI__builtin_sve_svmovlb_u16:
10566 case SVE::BI__builtin_sve_svmovlb_u32:
10567 case SVE::BI__builtin_sve_svmovlb_u64:
10568 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10569
10570 case SVE::BI__builtin_sve_svmovlb_s16:
10571 case SVE::BI__builtin_sve_svmovlb_s32:
10572 case SVE::BI__builtin_sve_svmovlb_s64:
10573 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10574
10575 case SVE::BI__builtin_sve_svmovlt_u16:
10576 case SVE::BI__builtin_sve_svmovlt_u32:
10577 case SVE::BI__builtin_sve_svmovlt_u64:
10578 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10579
10580 case SVE::BI__builtin_sve_svmovlt_s16:
10581 case SVE::BI__builtin_sve_svmovlt_s32:
10582 case SVE::BI__builtin_sve_svmovlt_s64:
10583 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10584
10585 case SVE::BI__builtin_sve_svpmullt_u16:
10586 case SVE::BI__builtin_sve_svpmullt_u64:
10587 case SVE::BI__builtin_sve_svpmullt_n_u16:
10588 case SVE::BI__builtin_sve_svpmullt_n_u64:
10589 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10590
10591 case SVE::BI__builtin_sve_svpmullb_u16:
10592 case SVE::BI__builtin_sve_svpmullb_u64:
10593 case SVE::BI__builtin_sve_svpmullb_n_u16:
10594 case SVE::BI__builtin_sve_svpmullb_n_u64:
10595 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10596
10597 case SVE::BI__builtin_sve_svdup_n_b8:
10598 case SVE::BI__builtin_sve_svdup_n_b16:
10599 case SVE::BI__builtin_sve_svdup_n_b32:
10600 case SVE::BI__builtin_sve_svdup_n_b64: {
10601 Value *CmpNE =
10602 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10603 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10604 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10605 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10606 }
10607
10608 case SVE::BI__builtin_sve_svdupq_n_b8:
10609 case SVE::BI__builtin_sve_svdupq_n_b16:
10610 case SVE::BI__builtin_sve_svdupq_n_b32:
10611 case SVE::BI__builtin_sve_svdupq_n_b64:
10612 case SVE::BI__builtin_sve_svdupq_n_u8:
10613 case SVE::BI__builtin_sve_svdupq_n_s8:
10614 case SVE::BI__builtin_sve_svdupq_n_u64:
10615 case SVE::BI__builtin_sve_svdupq_n_f64:
10616 case SVE::BI__builtin_sve_svdupq_n_s64:
10617 case SVE::BI__builtin_sve_svdupq_n_u16:
10618 case SVE::BI__builtin_sve_svdupq_n_f16:
10619 case SVE::BI__builtin_sve_svdupq_n_bf16:
10620 case SVE::BI__builtin_sve_svdupq_n_s16:
10621 case SVE::BI__builtin_sve_svdupq_n_u32:
10622 case SVE::BI__builtin_sve_svdupq_n_f32:
10623 case SVE::BI__builtin_sve_svdupq_n_s32: {
10624 // These builtins are implemented by storing each element to an array and using
10625 // ld1rq to materialize a vector.
10626 unsigned NumOpnds = Ops.size();
10627
10628 bool IsBoolTy =
10629 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10630
10631 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10632 // so that the compare can use the width that is natural for the expected
10633 // number of predicate lanes.
10634 llvm::Type *EltTy = Ops[0]->getType();
10635 if (IsBoolTy)
10636 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10637
10639 for (unsigned I = 0; I < NumOpnds; ++I)
10640 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10641 Value *Vec = BuildVector(VecOps);
10642
10643 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10644 Value *InsertSubVec = Builder.CreateInsertVector(
10645 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10646
10647 Function *F =
10648 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
10649 Value *DupQLane =
10650 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
10651
10652 if (!IsBoolTy)
10653 return DupQLane;
10654
10655 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10656 Value *Pred = EmitSVEAllTruePred(TypeFlags);
10657
10658 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
10659 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
10660 : Intrinsic::aarch64_sve_cmpne_wide,
10661 OverloadedTy);
10662 Value *Call = Builder.CreateCall(
10663 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
10664 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10665 }
10666
10667 case SVE::BI__builtin_sve_svpfalse_b:
10668 return ConstantInt::getFalse(Ty);
10669
10670 case SVE::BI__builtin_sve_svpfalse_c: {
10671 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
10672 Function *CastToSVCountF =
10673 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
10674 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
10675 }
10676
10677 case SVE::BI__builtin_sve_svlen_bf16:
10678 case SVE::BI__builtin_sve_svlen_f16:
10679 case SVE::BI__builtin_sve_svlen_f32:
10680 case SVE::BI__builtin_sve_svlen_f64:
10681 case SVE::BI__builtin_sve_svlen_s8:
10682 case SVE::BI__builtin_sve_svlen_s16:
10683 case SVE::BI__builtin_sve_svlen_s32:
10684 case SVE::BI__builtin_sve_svlen_s64:
10685 case SVE::BI__builtin_sve_svlen_u8:
10686 case SVE::BI__builtin_sve_svlen_u16:
10687 case SVE::BI__builtin_sve_svlen_u32:
10688 case SVE::BI__builtin_sve_svlen_u64: {
10689 SVETypeFlags TF(Builtin->TypeModifier);
10690 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
10691 auto *NumEls =
10692 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
10693
10694 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
10695 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
10696 }
10697
10698 case SVE::BI__builtin_sve_svtbl2_u8:
10699 case SVE::BI__builtin_sve_svtbl2_s8:
10700 case SVE::BI__builtin_sve_svtbl2_u16:
10701 case SVE::BI__builtin_sve_svtbl2_s16:
10702 case SVE::BI__builtin_sve_svtbl2_u32:
10703 case SVE::BI__builtin_sve_svtbl2_s32:
10704 case SVE::BI__builtin_sve_svtbl2_u64:
10705 case SVE::BI__builtin_sve_svtbl2_s64:
10706 case SVE::BI__builtin_sve_svtbl2_f16:
10707 case SVE::BI__builtin_sve_svtbl2_bf16:
10708 case SVE::BI__builtin_sve_svtbl2_f32:
10709 case SVE::BI__builtin_sve_svtbl2_f64: {
10710 SVETypeFlags TF(Builtin->TypeModifier);
10711 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10712 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10713 return Builder.CreateCall(F, Ops);
10714 }
10715
10716 case SVE::BI__builtin_sve_svset_neonq_s8:
10717 case SVE::BI__builtin_sve_svset_neonq_s16:
10718 case SVE::BI__builtin_sve_svset_neonq_s32:
10719 case SVE::BI__builtin_sve_svset_neonq_s64:
10720 case SVE::BI__builtin_sve_svset_neonq_u8:
10721 case SVE::BI__builtin_sve_svset_neonq_u16:
10722 case SVE::BI__builtin_sve_svset_neonq_u32:
10723 case SVE::BI__builtin_sve_svset_neonq_u64:
10724 case SVE::BI__builtin_sve_svset_neonq_f16:
10725 case SVE::BI__builtin_sve_svset_neonq_f32:
10726 case SVE::BI__builtin_sve_svset_neonq_f64:
10727 case SVE::BI__builtin_sve_svset_neonq_bf16: {
10728 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
10729 }
10730
10731 case SVE::BI__builtin_sve_svget_neonq_s8:
10732 case SVE::BI__builtin_sve_svget_neonq_s16:
10733 case SVE::BI__builtin_sve_svget_neonq_s32:
10734 case SVE::BI__builtin_sve_svget_neonq_s64:
10735 case SVE::BI__builtin_sve_svget_neonq_u8:
10736 case SVE::BI__builtin_sve_svget_neonq_u16:
10737 case SVE::BI__builtin_sve_svget_neonq_u32:
10738 case SVE::BI__builtin_sve_svget_neonq_u64:
10739 case SVE::BI__builtin_sve_svget_neonq_f16:
10740 case SVE::BI__builtin_sve_svget_neonq_f32:
10741 case SVE::BI__builtin_sve_svget_neonq_f64:
10742 case SVE::BI__builtin_sve_svget_neonq_bf16: {
10743 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
10744 }
10745
10746 case SVE::BI__builtin_sve_svdup_neonq_s8:
10747 case SVE::BI__builtin_sve_svdup_neonq_s16:
10748 case SVE::BI__builtin_sve_svdup_neonq_s32:
10749 case SVE::BI__builtin_sve_svdup_neonq_s64:
10750 case SVE::BI__builtin_sve_svdup_neonq_u8:
10751 case SVE::BI__builtin_sve_svdup_neonq_u16:
10752 case SVE::BI__builtin_sve_svdup_neonq_u32:
10753 case SVE::BI__builtin_sve_svdup_neonq_u64:
10754 case SVE::BI__builtin_sve_svdup_neonq_f16:
10755 case SVE::BI__builtin_sve_svdup_neonq_f32:
10756 case SVE::BI__builtin_sve_svdup_neonq_f64:
10757 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
10758 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
10759 Builder.getInt64(0));
10760 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
10761 {Insert, Builder.getInt64(0)});
10762 }
10763 }
10764
10765 /// Should not happen
10766 return nullptr;
10767}
10768
10769static void swapCommutativeSMEOperands(unsigned BuiltinID,
10771 unsigned MultiVec;
10772 switch (BuiltinID) {
10773 default:
10774 return;
10775 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
10776 MultiVec = 1;
10777 break;
10778 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
10779 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
10780 MultiVec = 2;
10781 break;
10782 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
10783 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
10784 MultiVec = 4;
10785 break;
10786 }
10787
10788 if (MultiVec > 0)
10789 for (unsigned I = 0; I < MultiVec; ++I)
10790 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
10791}
10792
10794 const CallExpr *E) {
10795 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10797
10799 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10800 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10801
10802 if (TypeFlags.isLoad() || TypeFlags.isStore())
10803 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10804 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
10805 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10806 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
10807 BuiltinID == SME::BI__builtin_sme_svzero_za)
10808 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10809 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
10810 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
10811 BuiltinID == SME::BI__builtin_sme_svldr_za ||
10812 BuiltinID == SME::BI__builtin_sme_svstr_za)
10813 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10814
10815 // Handle builtins which require their multi-vector operands to be swapped
10816 swapCommutativeSMEOperands(BuiltinID, Ops);
10817
10818 // Should not happen!
10819 if (Builtin->LLVMIntrinsic == 0)
10820 return nullptr;
10821
10822 // Predicates must match the main datatype.
10823 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10824 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10825 if (PredTy->getElementType()->isIntegerTy(1))
10826 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10827
10828 Function *F =
10829 TypeFlags.isOverloadNone()
10830 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
10831 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10832 Value *Call = Builder.CreateCall(F, Ops);
10833
10834 return FormSVEBuiltinResult(Call);
10835}
10836
10838 const CallExpr *E,
10839 llvm::Triple::ArchType Arch) {
10840 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10841 BuiltinID <= clang::AArch64::LastSVEBuiltin)
10842 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10843
10844 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10845 BuiltinID <= clang::AArch64::LastSMEBuiltin)
10846 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10847
10848 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
10849 return EmitAArch64CpuSupports(E);
10850
10851 unsigned HintID = static_cast<unsigned>(-1);
10852 switch (BuiltinID) {
10853 default: break;
10854 case clang::AArch64::BI__builtin_arm_nop:
10855 HintID = 0;
10856 break;
10857 case clang::AArch64::BI__builtin_arm_yield:
10858 case clang::AArch64::BI__yield:
10859 HintID = 1;
10860 break;
10861 case clang::AArch64::BI__builtin_arm_wfe:
10862 case clang::AArch64::BI__wfe:
10863 HintID = 2;
10864 break;
10865 case clang::AArch64::BI__builtin_arm_wfi:
10866 case clang::AArch64::BI__wfi:
10867 HintID = 3;
10868 break;
10869 case clang::AArch64::BI__builtin_arm_sev:
10870 case clang::AArch64::BI__sev:
10871 HintID = 4;
10872 break;
10873 case clang::AArch64::BI__builtin_arm_sevl:
10874 case clang::AArch64::BI__sevl:
10875 HintID = 5;
10876 break;
10877 }
10878
10879 if (HintID != static_cast<unsigned>(-1)) {
10880 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10881 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
10882 }
10883
10884 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
10885 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
10886 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10887 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
10888 }
10889
10890 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
10891 // Create call to __arm_sme_state and store the results to the two pointers.
10893 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
10894 false),
10895 "__arm_sme_state"));
10896 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
10897 "aarch64_pstate_sm_compatible");
10898 CI->setAttributes(Attrs);
10899 CI->setCallingConv(
10900 llvm::CallingConv::
10901 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
10902 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
10904 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
10906 }
10907
10908 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10909 assert((getContext().getTypeSize(E->getType()) == 32) &&
10910 "rbit of unusual size!");
10911 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10912 return Builder.CreateCall(
10913 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10914 }
10915 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
10916 assert((getContext().getTypeSize(E->getType()) == 64) &&
10917 "rbit of unusual size!");
10918 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10919 return Builder.CreateCall(
10920 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10921 }
10922
10923 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
10924 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
10925 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10926 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
10927 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
10928 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
10929 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
10930 return Res;
10931 }
10932
10933 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
10934 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10935 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
10936 "cls");
10937 }
10938 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
10939 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10940 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
10941 "cls");
10942 }
10943
10944 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
10945 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
10946 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10947 llvm::Type *Ty = Arg->getType();
10948 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
10949 Arg, "frint32z");
10950 }
10951
10952 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
10953 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
10954 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10955 llvm::Type *Ty = Arg->getType();
10956 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
10957 Arg, "frint64z");
10958 }
10959
10960 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
10961 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
10962 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10963 llvm::Type *Ty = Arg->getType();
10964 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
10965 Arg, "frint32x");
10966 }
10967
10968 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
10969 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
10970 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10971 llvm::Type *Ty = Arg->getType();
10972 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
10973 Arg, "frint64x");
10974 }
10975
10976 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
10977 assert((getContext().getTypeSize(E->getType()) == 32) &&
10978 "__jcvt of unusual size!");
10979 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10980 return Builder.CreateCall(
10981 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
10982 }
10983
10984 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
10985 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
10986 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
10987 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
10988 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
10989 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
10990
10991 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
10992 // Load from the address via an LLVM intrinsic, receiving a
10993 // tuple of 8 i64 words, and store each one to ValPtr.
10994 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
10995 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
10996 llvm::Value *ToRet;
10997 for (size_t i = 0; i < 8; i++) {
10998 llvm::Value *ValOffsetPtr =
10999 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11000 Address Addr =
11001 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11002 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11003 }
11004 return ToRet;
11005 } else {
11006 // Load 8 i64 words from ValPtr, and store them to the address
11007 // via an LLVM intrinsic.
11009 Args.push_back(MemAddr);
11010 for (size_t i = 0; i < 8; i++) {
11011 llvm::Value *ValOffsetPtr =
11012 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11013 Address Addr =
11014 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11015 Args.push_back(Builder.CreateLoad(Addr));
11016 }
11017
11018 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11019 ? Intrinsic::aarch64_st64b
11020 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11021 ? Intrinsic::aarch64_st64bv
11022 : Intrinsic::aarch64_st64bv0);
11023 Function *F = CGM.getIntrinsic(Intr);
11024 return Builder.CreateCall(F, Args);
11025 }
11026 }
11027
11028 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11029 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11030
11031 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11032 ? Intrinsic::aarch64_rndr
11033 : Intrinsic::aarch64_rndrrs);
11034 Function *F = CGM.getIntrinsic(Intr);
11035 llvm::Value *Val = Builder.CreateCall(F);
11036 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11037 Value *Status = Builder.CreateExtractValue(Val, 1);
11038
11039 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11040 Builder.CreateStore(RandomValue, MemAddress);
11041 Status = Builder.CreateZExt(Status, Int32Ty);
11042 return Status;
11043 }
11044
11045 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11046 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11047 const FunctionDecl *FD = E->getDirectCallee();
11048 Value *Ops[2];
11049 for (unsigned i = 0; i < 2; i++)
11050 Ops[i] = EmitScalarExpr(E->getArg(i));
11051 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11052 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11053 StringRef Name = FD->getName();
11054 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11055 }
11056
11057 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11058 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11059 getContext().getTypeSize(E->getType()) == 128) {
11060 Function *F =
11061 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11062 ? Intrinsic::aarch64_ldaxp
11063 : Intrinsic::aarch64_ldxp);
11064
11065 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11066 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11067
11068 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11069 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11070 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11071 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11072 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11073
11074 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11075 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11076 Val = Builder.CreateOr(Val, Val1);
11077 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11078 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11079 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11080 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11081
11082 QualType Ty = E->getType();
11083 llvm::Type *RealResTy = ConvertType(Ty);
11084 llvm::Type *IntTy =
11085 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11086
11087 Function *F =
11088 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11089 ? Intrinsic::aarch64_ldaxr
11090 : Intrinsic::aarch64_ldxr,
11091 UnqualPtrTy);
11092 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11093 Val->addParamAttr(
11094 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11095
11096 if (RealResTy->isPointerTy())
11097 return Builder.CreateIntToPtr(Val, RealResTy);
11098
11099 llvm::Type *IntResTy = llvm::IntegerType::get(
11100 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11101 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11102 RealResTy);
11103 }
11104
11105 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11106 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11107 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11108 Function *F =
11109 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11110 ? Intrinsic::aarch64_stlxp
11111 : Intrinsic::aarch64_stxp);
11112 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11113
11114 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11115 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11116
11117 Tmp = Tmp.withElementType(STy);
11118 llvm::Value *Val = Builder.CreateLoad(Tmp);
11119
11120 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11121 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11122 Value *StPtr = EmitScalarExpr(E->getArg(1));
11123 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11124 }
11125
11126 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11127 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11128 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11129 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11130
11131 QualType Ty = E->getArg(0)->getType();
11132 llvm::Type *StoreTy =
11133 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11134
11135 if (StoreVal->getType()->isPointerTy())
11136 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11137 else {
11138 llvm::Type *IntTy = llvm::IntegerType::get(
11140 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11141 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11142 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11143 }
11144
11145 Function *F =
11146 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11147 ? Intrinsic::aarch64_stlxr
11148 : Intrinsic::aarch64_stxr,
11149 StoreAddr->getType());
11150 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11151 CI->addParamAttr(
11152 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11153 return CI;
11154 }
11155
11156 if (BuiltinID == clang::AArch64::BI__getReg) {
11158 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11159 llvm_unreachable("Sema will ensure that the parameter is constant");
11160
11161 llvm::APSInt Value = Result.Val.getInt();
11162 LLVMContext &Context = CGM.getLLVMContext();
11163 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11164
11165 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11166 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11167 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11168
11169 llvm::Function *F =
11170 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11171 return Builder.CreateCall(F, Metadata);
11172 }
11173
11174 if (BuiltinID == clang::AArch64::BI__break) {
11176 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11177 llvm_unreachable("Sema will ensure that the parameter is constant");
11178
11179 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11180 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11181 }
11182
11183 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11184 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11185 return Builder.CreateCall(F);
11186 }
11187
11188 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11189 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11190 llvm::SyncScope::SingleThread);
11191
11192 // CRC32
11193 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11194 switch (BuiltinID) {
11195 case clang::AArch64::BI__builtin_arm_crc32b:
11196 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11197 case clang::AArch64::BI__builtin_arm_crc32cb:
11198 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11199 case clang::AArch64::BI__builtin_arm_crc32h:
11200 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11201 case clang::AArch64::BI__builtin_arm_crc32ch:
11202 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11203 case clang::AArch64::BI__builtin_arm_crc32w:
11204 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11205 case clang::AArch64::BI__builtin_arm_crc32cw:
11206 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11207 case clang::AArch64::BI__builtin_arm_crc32d:
11208 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11209 case clang::AArch64::BI__builtin_arm_crc32cd:
11210 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11211 }
11212
11213 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11214 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11215 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11216 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11217
11218 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11219 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11220
11221 return Builder.CreateCall(F, {Arg0, Arg1});
11222 }
11223
11224 // Memory Operations (MOPS)
11225 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11226 Value *Dst = EmitScalarExpr(E->getArg(0));
11227 Value *Val = EmitScalarExpr(E->getArg(1));
11228 Value *Size = EmitScalarExpr(E->getArg(2));
11229 Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
11230 Val = Builder.CreateTrunc(Val, Int8Ty);
11231 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11232 return Builder.CreateCall(
11233 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11234 }
11235
11236 // Memory Tagging Extensions (MTE) Intrinsics
11237 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11238 switch (BuiltinID) {
11239 case clang::AArch64::BI__builtin_arm_irg:
11240 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11241 case clang::AArch64::BI__builtin_arm_addg:
11242 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11243 case clang::AArch64::BI__builtin_arm_gmi:
11244 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11245 case clang::AArch64::BI__builtin_arm_ldg:
11246 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11247 case clang::AArch64::BI__builtin_arm_stg:
11248 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11249 case clang::AArch64::BI__builtin_arm_subp:
11250 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11251 }
11252
11253 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11254 llvm::Type *T = ConvertType(E->getType());
11255
11256 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11258 Value *Mask = EmitScalarExpr(E->getArg(1));
11259
11260 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11261 Mask = Builder.CreateZExt(Mask, Int64Ty);
11262 Value *RV = Builder.CreateCall(
11263 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
11264 return Builder.CreatePointerCast(RV, T);
11265 }
11266 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11268 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11269
11270 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11271 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11272 Value *RV = Builder.CreateCall(
11273 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
11274 return Builder.CreatePointerCast(RV, T);
11275 }
11276 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11278 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11279
11280 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11281 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11282 return Builder.CreateCall(
11283 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11284 }
11285 // Although it is possible to supply a different return
11286 // address (first arg) to this intrinsic, for now we set
11287 // return address same as input address.
11288 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11289 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11290 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11291 Value *RV = Builder.CreateCall(
11292 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11293 return Builder.CreatePointerCast(RV, T);
11294 }
11295 // Although it is possible to supply a different tag (to set)
11296 // to this intrinsic (as first arg), for now we supply
11297 // the tag that is in input address arg (common use case).
11298 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11299 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11300 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11301 return Builder.CreateCall(
11302 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11303 }
11304 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11305 Value *PointerA = EmitScalarExpr(E->getArg(0));
11306 Value *PointerB = EmitScalarExpr(E->getArg(1));
11307 PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
11308 PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
11309 return Builder.CreateCall(
11310 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11311 }
11312 }
11313
11314 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11315 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11316 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11317 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11318 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11319 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11320 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11321 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11322
11323 SpecialRegisterAccessKind AccessKind = Write;
11324 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11325 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11326 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11327 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11328 AccessKind = VolatileRead;
11329
11330 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11331 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11332
11333 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11334 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11335
11336 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11337 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11338
11339 llvm::Type *ValueType;
11340 llvm::Type *RegisterType = Int64Ty;
11341 if (Is32Bit) {
11342 ValueType = Int32Ty;
11343 } else if (Is128Bit) {
11344 llvm::Type *Int128Ty =
11345 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11346 ValueType = Int128Ty;
11347 RegisterType = Int128Ty;
11348 } else if (IsPointerBuiltin) {
11349 ValueType = VoidPtrTy;
11350 } else {
11351 ValueType = Int64Ty;
11352 };
11353
11354 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11355 AccessKind);
11356 }
11357
11358 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11359 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11360 LLVMContext &Context = CGM.getLLVMContext();
11361
11362 unsigned SysReg =
11363 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11364
11365 std::string SysRegStr;
11366 llvm::raw_string_ostream(SysRegStr) <<
11367 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11368 ((SysReg >> 11) & 7) << ":" <<
11369 ((SysReg >> 7) & 15) << ":" <<
11370 ((SysReg >> 3) & 15) << ":" <<
11371 ( SysReg & 7);
11372
11373 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11374 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11375 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11376
11377 llvm::Type *RegisterType = Int64Ty;
11378 llvm::Type *Types[] = { RegisterType };
11379
11380 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11381 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11382
11383 return Builder.CreateCall(F, Metadata);
11384 }
11385
11386 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11387 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11388
11389 return Builder.CreateCall(F, { Metadata, ArgValue });
11390 }
11391
11392 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11393 llvm::Function *F =
11394 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11395 return Builder.CreateCall(F);
11396 }
11397
11398 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11399 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11400 return Builder.CreateCall(F);
11401 }
11402
11403 if (BuiltinID == clang::AArch64::BI__mulh ||
11404 BuiltinID == clang::AArch64::BI__umulh) {
11405 llvm::Type *ResType = ConvertType(E->getType());
11406 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11407
11408 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11409 Value *LHS =
11410 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11411 Value *RHS =
11412 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11413
11414 Value *MulResult, *HigherBits;
11415 if (IsSigned) {
11416 MulResult = Builder.CreateNSWMul(LHS, RHS);
11417 HigherBits = Builder.CreateAShr(MulResult, 64);
11418 } else {
11419 MulResult = Builder.CreateNUWMul(LHS, RHS);
11420 HigherBits = Builder.CreateLShr(MulResult, 64);
11421 }
11422 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11423
11424 return HigherBits;
11425 }
11426
11427 if (BuiltinID == AArch64::BI__writex18byte ||
11428 BuiltinID == AArch64::BI__writex18word ||
11429 BuiltinID == AArch64::BI__writex18dword ||
11430 BuiltinID == AArch64::BI__writex18qword) {
11431 // Read x18 as i8*
11432 LLVMContext &Context = CGM.getLLVMContext();
11433 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11434 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11435 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11436 llvm::Function *F =
11437 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11438 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11439 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11440
11441 // Store val at x18 + offset
11442 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11443 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11444 Value *Val = EmitScalarExpr(E->getArg(1));
11445 StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
11446 return Store;
11447 }
11448
11449 if (BuiltinID == AArch64::BI__readx18byte ||
11450 BuiltinID == AArch64::BI__readx18word ||
11451 BuiltinID == AArch64::BI__readx18dword ||
11452 BuiltinID == AArch64::BI__readx18qword) {
11453 llvm::Type *IntTy = ConvertType(E->getType());
11454
11455 // Read x18 as i8*
11456 LLVMContext &Context = CGM.getLLVMContext();
11457 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11458 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11459 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11460 llvm::Function *F =
11461 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11462 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11463 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11464
11465 // Load x18 + offset
11466 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11467 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11468 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11469 return Load;
11470 }
11471
11472 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11473 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11474 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11475 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11476 Value *Arg = EmitScalarExpr(E->getArg(0));
11477 llvm::Type *RetTy = ConvertType(E->getType());
11478 return Builder.CreateBitCast(Arg, RetTy);
11479 }
11480
11481 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11482 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11483 BuiltinID == AArch64::BI_CountLeadingZeros ||
11484 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11485 Value *Arg = EmitScalarExpr(E->getArg(0));
11486 llvm::Type *ArgType = Arg->getType();
11487
11488 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11489 BuiltinID == AArch64::BI_CountLeadingOnes64)
11490 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11491
11492 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11493 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11494
11495 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11496 BuiltinID == AArch64::BI_CountLeadingZeros64)
11497 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11498 return Result;
11499 }
11500
11501 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11502 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11503 Value *Arg = EmitScalarExpr(E->getArg(0));
11504
11505 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11506 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11507 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11508
11509 Value *Result = Builder.CreateCall(F, Arg, "cls");
11510 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11511 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11512 return Result;
11513 }
11514
11515 if (BuiltinID == AArch64::BI_CountOneBits ||
11516 BuiltinID == AArch64::BI_CountOneBits64) {
11517 Value *ArgValue = EmitScalarExpr(E->getArg(0));
11518 llvm::Type *ArgType = ArgValue->getType();
11519 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11520
11521 Value *Result = Builder.CreateCall(F, ArgValue);
11522 if (BuiltinID == AArch64::BI_CountOneBits64)
11523 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11524 return Result;
11525 }
11526
11527 if (BuiltinID == AArch64::BI__prefetch) {
11529 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11530 Value *Locality = ConstantInt::get(Int32Ty, 3);
11531 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11532 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11533 return Builder.CreateCall(F, {Address, RW, Locality, Data});
11534 }
11535
11536 // Handle MSVC intrinsics before argument evaluation to prevent double
11537 // evaluation.
11538 if (std::optional<MSVCIntrin> MsvcIntId =
11540 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11541
11542 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11543 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11544 return P.first == BuiltinID;
11545 });
11546 if (It != end(NEONEquivalentIntrinsicMap))
11547 BuiltinID = It->second;
11548
11549 // Find out if any arguments are required to be integer constant
11550 // expressions.
11551 unsigned ICEArguments = 0;
11553 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11554 assert(Error == ASTContext::GE_None && "Should not codegen an error");
11555
11557 Address PtrOp0 = Address::invalid();
11558 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11559 if (i == 0) {
11560 switch (BuiltinID) {
11561 case NEON::BI__builtin_neon_vld1_v:
11562 case NEON::BI__builtin_neon_vld1q_v:
11563 case NEON::BI__builtin_neon_vld1_dup_v:
11564 case NEON::BI__builtin_neon_vld1q_dup_v:
11565 case NEON::BI__builtin_neon_vld1_lane_v:
11566 case NEON::BI__builtin_neon_vld1q_lane_v:
11567 case NEON::BI__builtin_neon_vst1_v:
11568 case NEON::BI__builtin_neon_vst1q_v:
11569 case NEON::BI__builtin_neon_vst1_lane_v:
11570 case NEON::BI__builtin_neon_vst1q_lane_v:
11571 case NEON::BI__builtin_neon_vldap1_lane_s64:
11572 case NEON::BI__builtin_neon_vldap1q_lane_s64:
11573 case NEON::BI__builtin_neon_vstl1_lane_s64:
11574 case NEON::BI__builtin_neon_vstl1q_lane_s64:
11575 // Get the alignment for the argument in addition to the value;
11576 // we'll use it later.
11577 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11578 Ops.push_back(PtrOp0.emitRawPointer(*this));
11579 continue;
11580 }
11581 }
11582 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11583 }
11584
11585 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
11586 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
11587 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
11588
11589 if (Builtin) {
11590 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
11591 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
11592 assert(Result && "SISD intrinsic should have been handled");
11593 return Result;
11594 }
11595
11596 const Expr *Arg = E->getArg(E->getNumArgs()-1);
11598 if (std::optional<llvm::APSInt> Result =
11600 // Determine the type of this overloaded NEON intrinsic.
11601 Type = NeonTypeFlags(Result->getZExtValue());
11602
11603 bool usgn = Type.isUnsigned();
11604 bool quad = Type.isQuad();
11605
11606 // Handle non-overloaded intrinsics first.
11607 switch (BuiltinID) {
11608 default: break;
11609 case NEON::BI__builtin_neon_vabsh_f16:
11610 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11611 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
11612 case NEON::BI__builtin_neon_vaddq_p128: {
11613 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
11614 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11615 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11616 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11617 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
11618 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11619 return Builder.CreateBitCast(Ops[0], Int128Ty);
11620 }
11621 case NEON::BI__builtin_neon_vldrq_p128: {
11622 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11623 Value *Ptr = EmitScalarExpr(E->getArg(0));
11624 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
11626 }
11627 case NEON::BI__builtin_neon_vstrq_p128: {
11628 Value *Ptr = Ops[0];
11630 }
11631 case NEON::BI__builtin_neon_vcvts_f32_u32:
11632 case NEON::BI__builtin_neon_vcvtd_f64_u64:
11633 usgn = true;
11634 [[fallthrough]];
11635 case NEON::BI__builtin_neon_vcvts_f32_s32:
11636 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
11637 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11638 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
11639 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
11640 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
11641 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11642 if (usgn)
11643 return Builder.CreateUIToFP(Ops[0], FTy);
11644 return Builder.CreateSIToFP(Ops[0], FTy);
11645 }
11646 case NEON::BI__builtin_neon_vcvth_f16_u16:
11647 case NEON::BI__builtin_neon_vcvth_f16_u32:
11648 case NEON::BI__builtin_neon_vcvth_f16_u64:
11649 usgn = true;
11650 [[fallthrough]];
11651 case NEON::BI__builtin_neon_vcvth_f16_s16:
11652 case NEON::BI__builtin_neon_vcvth_f16_s32:
11653 case NEON::BI__builtin_neon_vcvth_f16_s64: {
11654 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11655 llvm::Type *FTy = HalfTy;
11656 llvm::Type *InTy;
11657 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
11658 InTy = Int64Ty;
11659 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
11660 InTy = Int32Ty;
11661 else
11662 InTy = Int16Ty;
11663 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11664 if (usgn)
11665 return Builder.CreateUIToFP(Ops[0], FTy);
11666 return Builder.CreateSIToFP(Ops[0], FTy);
11667 }
11668 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11669 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11670 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11671 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11672 case NEON::BI__builtin_neon_vcvth_u16_f16:
11673 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11674 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11675 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11676 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11677 case NEON::BI__builtin_neon_vcvth_s16_f16: {
11678 unsigned Int;
11679 llvm::Type* InTy = Int32Ty;
11680 llvm::Type* FTy = HalfTy;
11681 llvm::Type *Tys[2] = {InTy, FTy};
11682 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11683 switch (BuiltinID) {
11684 default: llvm_unreachable("missing builtin ID in switch!");
11685 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11686 Int = Intrinsic::aarch64_neon_fcvtau; break;
11687 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11688 Int = Intrinsic::aarch64_neon_fcvtmu; break;
11689 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11690 Int = Intrinsic::aarch64_neon_fcvtnu; break;
11691 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11692 Int = Intrinsic::aarch64_neon_fcvtpu; break;
11693 case NEON::BI__builtin_neon_vcvth_u16_f16:
11694 Int = Intrinsic::aarch64_neon_fcvtzu; break;
11695 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11696 Int = Intrinsic::aarch64_neon_fcvtas; break;
11697 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11698 Int = Intrinsic::aarch64_neon_fcvtms; break;
11699 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11700 Int = Intrinsic::aarch64_neon_fcvtns; break;
11701 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11702 Int = Intrinsic::aarch64_neon_fcvtps; break;
11703 case NEON::BI__builtin_neon_vcvth_s16_f16:
11704 Int = Intrinsic::aarch64_neon_fcvtzs; break;
11705 }
11706 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
11707 return Builder.CreateTrunc(Ops[0], Int16Ty);
11708 }
11709 case NEON::BI__builtin_neon_vcaleh_f16:
11710 case NEON::BI__builtin_neon_vcalth_f16:
11711 case NEON::BI__builtin_neon_vcageh_f16:
11712 case NEON::BI__builtin_neon_vcagth_f16: {
11713 unsigned Int;
11714 llvm::Type* InTy = Int32Ty;
11715 llvm::Type* FTy = HalfTy;
11716 llvm::Type *Tys[2] = {InTy, FTy};
11717 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11718 switch (BuiltinID) {
11719 default: llvm_unreachable("missing builtin ID in switch!");
11720 case NEON::BI__builtin_neon_vcageh_f16:
11721 Int = Intrinsic::aarch64_neon_facge; break;
11722 case NEON::BI__builtin_neon_vcagth_f16:
11723 Int = Intrinsic::aarch64_neon_facgt; break;
11724 case NEON::BI__builtin_neon_vcaleh_f16:
11725 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
11726 case NEON::BI__builtin_neon_vcalth_f16:
11727 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
11728 }
11729 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
11730 return Builder.CreateTrunc(Ops[0], Int16Ty);
11731 }
11732 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11733 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
11734 unsigned Int;
11735 llvm::Type* InTy = Int32Ty;
11736 llvm::Type* FTy = HalfTy;
11737 llvm::Type *Tys[2] = {InTy, FTy};
11738 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11739 switch (BuiltinID) {
11740 default: llvm_unreachable("missing builtin ID in switch!");
11741 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11742 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
11743 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
11744 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
11745 }
11746 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11747 return Builder.CreateTrunc(Ops[0], Int16Ty);
11748 }
11749 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11750 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
11751 unsigned Int;
11752 llvm::Type* FTy = HalfTy;
11753 llvm::Type* InTy = Int32Ty;
11754 llvm::Type *Tys[2] = {FTy, InTy};
11755 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11756 switch (BuiltinID) {
11757 default: llvm_unreachable("missing builtin ID in switch!");
11758 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11759 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
11760 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
11761 break;
11762 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
11763 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
11764 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
11765 break;
11766 }
11767 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11768 }
11769 case NEON::BI__builtin_neon_vpaddd_s64: {
11770 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
11771 Value *Vec = EmitScalarExpr(E->getArg(0));
11772 // The vector is v2f64, so make sure it's bitcast to that.
11773 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
11774 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11775 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11776 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11777 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11778 // Pairwise addition of a v2f64 into a scalar f64.
11779 return Builder.CreateAdd(Op0, Op1, "vpaddd");
11780 }
11781 case NEON::BI__builtin_neon_vpaddd_f64: {
11782 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
11783 Value *Vec = EmitScalarExpr(E->getArg(0));
11784 // The vector is v2f64, so make sure it's bitcast to that.
11785 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
11786 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11787 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11788 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11789 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11790 // Pairwise addition of a v2f64 into a scalar f64.
11791 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11792 }
11793 case NEON::BI__builtin_neon_vpadds_f32: {
11794 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
11795 Value *Vec = EmitScalarExpr(E->getArg(0));
11796 // The vector is v2f32, so make sure it's bitcast to that.
11797 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
11798 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11799 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11800 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11801 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11802 // Pairwise addition of a v2f32 into a scalar f32.
11803 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11804 }
11805 case NEON::BI__builtin_neon_vceqzd_s64:
11806 case NEON::BI__builtin_neon_vceqzd_f64:
11807 case NEON::BI__builtin_neon_vceqzs_f32:
11808 case NEON::BI__builtin_neon_vceqzh_f16:
11809 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11812 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
11813 case NEON::BI__builtin_neon_vcgezd_s64:
11814 case NEON::BI__builtin_neon_vcgezd_f64:
11815 case NEON::BI__builtin_neon_vcgezs_f32:
11816 case NEON::BI__builtin_neon_vcgezh_f16:
11817 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11820 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
11821 case NEON::BI__builtin_neon_vclezd_s64:
11822 case NEON::BI__builtin_neon_vclezd_f64:
11823 case NEON::BI__builtin_neon_vclezs_f32:
11824 case NEON::BI__builtin_neon_vclezh_f16:
11825 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11828 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
11829 case NEON::BI__builtin_neon_vcgtzd_s64:
11830 case NEON::BI__builtin_neon_vcgtzd_f64:
11831 case NEON::BI__builtin_neon_vcgtzs_f32:
11832 case NEON::BI__builtin_neon_vcgtzh_f16:
11833 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11836 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
11837 case NEON::BI__builtin_neon_vcltzd_s64:
11838 case NEON::BI__builtin_neon_vcltzd_f64:
11839 case NEON::BI__builtin_neon_vcltzs_f32:
11840 case NEON::BI__builtin_neon_vcltzh_f16:
11841 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11844 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
11845
11846 case NEON::BI__builtin_neon_vceqzd_u64: {
11847 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11848 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11849 Ops[0] =
11850 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
11851 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
11852 }
11853 case NEON::BI__builtin_neon_vceqd_f64:
11854 case NEON::BI__builtin_neon_vcled_f64:
11855 case NEON::BI__builtin_neon_vcltd_f64:
11856 case NEON::BI__builtin_neon_vcged_f64:
11857 case NEON::BI__builtin_neon_vcgtd_f64: {
11858 llvm::CmpInst::Predicate P;
11859 switch (BuiltinID) {
11860 default: llvm_unreachable("missing builtin ID in switch!");
11861 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
11862 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
11863 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
11864 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
11865 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
11866 }
11867 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11868 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11869 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11870 if (P == llvm::FCmpInst::FCMP_OEQ)
11871 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11872 else
11873 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11874 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
11875 }
11876 case NEON::BI__builtin_neon_vceqs_f32:
11877 case NEON::BI__builtin_neon_vcles_f32:
11878 case NEON::BI__builtin_neon_vclts_f32:
11879 case NEON::BI__builtin_neon_vcges_f32:
11880 case NEON::BI__builtin_neon_vcgts_f32: {
11881 llvm::CmpInst::Predicate P;
11882 switch (BuiltinID) {
11883 default: llvm_unreachable("missing builtin ID in switch!");
11884 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
11885 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
11886 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
11887 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
11888 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
11889 }
11890 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11891 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
11892 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
11893 if (P == llvm::FCmpInst::FCMP_OEQ)
11894 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11895 else
11896 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11897 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
11898 }
11899 case NEON::BI__builtin_neon_vceqh_f16:
11900 case NEON::BI__builtin_neon_vcleh_f16:
11901 case NEON::BI__builtin_neon_vclth_f16:
11902 case NEON::BI__builtin_neon_vcgeh_f16:
11903 case NEON::BI__builtin_neon_vcgth_f16: {
11904 llvm::CmpInst::Predicate P;
11905 switch (BuiltinID) {
11906 default: llvm_unreachable("missing builtin ID in switch!");
11907 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
11908 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
11909 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
11910 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
11911 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
11912 }
11913 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11914 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
11915 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
11916 if (P == llvm::FCmpInst::FCMP_OEQ)
11917 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11918 else
11919 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11920 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
11921 }
11922 case NEON::BI__builtin_neon_vceqd_s64:
11923 case NEON::BI__builtin_neon_vceqd_u64:
11924 case NEON::BI__builtin_neon_vcgtd_s64:
11925 case NEON::BI__builtin_neon_vcgtd_u64:
11926 case NEON::BI__builtin_neon_vcltd_s64:
11927 case NEON::BI__builtin_neon_vcltd_u64:
11928 case NEON::BI__builtin_neon_vcged_u64:
11929 case NEON::BI__builtin_neon_vcged_s64:
11930 case NEON::BI__builtin_neon_vcled_u64:
11931 case NEON::BI__builtin_neon_vcled_s64: {
11932 llvm::CmpInst::Predicate P;
11933 switch (BuiltinID) {
11934 default: llvm_unreachable("missing builtin ID in switch!");
11935 case NEON::BI__builtin_neon_vceqd_s64:
11936 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
11937 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
11938 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
11939 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
11940 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
11941 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
11942 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
11943 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
11944 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
11945 }
11946 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11947 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11948 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11949 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
11950 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
11951 }
11952 case NEON::BI__builtin_neon_vtstd_s64:
11953 case NEON::BI__builtin_neon_vtstd_u64: {
11954 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11955 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11956 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11957 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
11958 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
11959 llvm::Constant::getNullValue(Int64Ty));
11960 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
11961 }
11962 case NEON::BI__builtin_neon_vset_lane_i8:
11963 case NEON::BI__builtin_neon_vset_lane_i16:
11964 case NEON::BI__builtin_neon_vset_lane_i32:
11965 case NEON::BI__builtin_neon_vset_lane_i64:
11966 case NEON::BI__builtin_neon_vset_lane_bf16:
11967 case NEON::BI__builtin_neon_vset_lane_f32:
11968 case NEON::BI__builtin_neon_vsetq_lane_i8:
11969 case NEON::BI__builtin_neon_vsetq_lane_i16:
11970 case NEON::BI__builtin_neon_vsetq_lane_i32:
11971 case NEON::BI__builtin_neon_vsetq_lane_i64:
11972 case NEON::BI__builtin_neon_vsetq_lane_bf16:
11973 case NEON::BI__builtin_neon_vsetq_lane_f32:
11974 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11975 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11976 case NEON::BI__builtin_neon_vset_lane_f64:
11977 // The vector type needs a cast for the v1f64 variant.
11978 Ops[1] =
11979 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
11980 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11981 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11982 case NEON::BI__builtin_neon_vsetq_lane_f64:
11983 // The vector type needs a cast for the v2f64 variant.
11984 Ops[1] =
11985 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
11986 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11987 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11988
11989 case NEON::BI__builtin_neon_vget_lane_i8:
11990 case NEON::BI__builtin_neon_vdupb_lane_i8:
11991 Ops[0] =
11992 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
11993 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11994 "vget_lane");
11995 case NEON::BI__builtin_neon_vgetq_lane_i8:
11996 case NEON::BI__builtin_neon_vdupb_laneq_i8:
11997 Ops[0] =
11998 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
11999 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12000 "vgetq_lane");
12001 case NEON::BI__builtin_neon_vget_lane_i16:
12002 case NEON::BI__builtin_neon_vduph_lane_i16:
12003 Ops[0] =
12004 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12005 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12006 "vget_lane");
12007 case NEON::BI__builtin_neon_vgetq_lane_i16:
12008 case NEON::BI__builtin_neon_vduph_laneq_i16:
12009 Ops[0] =
12010 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12011 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12012 "vgetq_lane");
12013 case NEON::BI__builtin_neon_vget_lane_i32:
12014 case NEON::BI__builtin_neon_vdups_lane_i32:
12015 Ops[0] =
12016 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12017 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12018 "vget_lane");
12019 case NEON::BI__builtin_neon_vdups_lane_f32:
12020 Ops[0] =
12021 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12022 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12023 "vdups_lane");
12024 case NEON::BI__builtin_neon_vgetq_lane_i32:
12025 case NEON::BI__builtin_neon_vdups_laneq_i32:
12026 Ops[0] =
12027 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12028 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12029 "vgetq_lane");
12030 case NEON::BI__builtin_neon_vget_lane_i64:
12031 case NEON::BI__builtin_neon_vdupd_lane_i64:
12032 Ops[0] =
12033 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12034 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12035 "vget_lane");
12036 case NEON::BI__builtin_neon_vdupd_lane_f64:
12037 Ops[0] =
12038 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12039 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12040 "vdupd_lane");
12041 case NEON::BI__builtin_neon_vgetq_lane_i64:
12042 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12043 Ops[0] =
12044 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12045 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12046 "vgetq_lane");
12047 case NEON::BI__builtin_neon_vget_lane_f32:
12048 Ops[0] =
12049 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12050 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12051 "vget_lane");
12052 case NEON::BI__builtin_neon_vget_lane_f64:
12053 Ops[0] =
12054 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12055 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12056 "vget_lane");
12057 case NEON::BI__builtin_neon_vgetq_lane_f32:
12058 case NEON::BI__builtin_neon_vdups_laneq_f32:
12059 Ops[0] =
12060 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12061 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12062 "vgetq_lane");
12063 case NEON::BI__builtin_neon_vgetq_lane_f64:
12064 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12065 Ops[0] =
12066 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12067 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12068 "vgetq_lane");
12069 case NEON::BI__builtin_neon_vaddh_f16:
12070 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12071 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12072 case NEON::BI__builtin_neon_vsubh_f16:
12073 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12074 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12075 case NEON::BI__builtin_neon_vmulh_f16:
12076 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12077 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12078 case NEON::BI__builtin_neon_vdivh_f16:
12079 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12080 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12081 case NEON::BI__builtin_neon_vfmah_f16:
12082 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12084 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12085 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12086 case NEON::BI__builtin_neon_vfmsh_f16: {
12087 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12088
12089 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12091 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12092 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12093 }
12094 case NEON::BI__builtin_neon_vaddd_s64:
12095 case NEON::BI__builtin_neon_vaddd_u64:
12096 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12097 case NEON::BI__builtin_neon_vsubd_s64:
12098 case NEON::BI__builtin_neon_vsubd_u64:
12099 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12100 case NEON::BI__builtin_neon_vqdmlalh_s16:
12101 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12102 SmallVector<Value *, 2> ProductOps;
12103 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12104 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12105 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12106 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12107 ProductOps, "vqdmlXl");
12108 Constant *CI = ConstantInt::get(SizeTy, 0);
12109 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12110
12111 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12112 ? Intrinsic::aarch64_neon_sqadd
12113 : Intrinsic::aarch64_neon_sqsub;
12114 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12115 }
12116 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12117 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12118 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12119 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12120 Ops, "vqshlu_n");
12121 }
12122 case NEON::BI__builtin_neon_vqshld_n_u64:
12123 case NEON::BI__builtin_neon_vqshld_n_s64: {
12124 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12125 ? Intrinsic::aarch64_neon_uqshl
12126 : Intrinsic::aarch64_neon_sqshl;
12127 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12128 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12129 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12130 }
12131 case NEON::BI__builtin_neon_vrshrd_n_u64:
12132 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12133 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12134 ? Intrinsic::aarch64_neon_urshl
12135 : Intrinsic::aarch64_neon_srshl;
12136 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12137 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12138 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12139 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12140 }
12141 case NEON::BI__builtin_neon_vrsrad_n_u64:
12142 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12143 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12144 ? Intrinsic::aarch64_neon_urshl
12145 : Intrinsic::aarch64_neon_srshl;
12146 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12147 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12148 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12149 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12150 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12151 }
12152 case NEON::BI__builtin_neon_vshld_n_s64:
12153 case NEON::BI__builtin_neon_vshld_n_u64: {
12154 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12155 return Builder.CreateShl(
12156 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12157 }
12158 case NEON::BI__builtin_neon_vshrd_n_s64: {
12159 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12160 return Builder.CreateAShr(
12161 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12162 Amt->getZExtValue())),
12163 "shrd_n");
12164 }
12165 case NEON::BI__builtin_neon_vshrd_n_u64: {
12166 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12167 uint64_t ShiftAmt = Amt->getZExtValue();
12168 // Right-shifting an unsigned value by its size yields 0.
12169 if (ShiftAmt == 64)
12170 return ConstantInt::get(Int64Ty, 0);
12171 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12172 "shrd_n");
12173 }
12174 case NEON::BI__builtin_neon_vsrad_n_s64: {
12175 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12176 Ops[1] = Builder.CreateAShr(
12177 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12178 Amt->getZExtValue())),
12179 "shrd_n");
12180 return Builder.CreateAdd(Ops[0], Ops[1]);
12181 }
12182 case NEON::BI__builtin_neon_vsrad_n_u64: {
12183 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12184 uint64_t ShiftAmt = Amt->getZExtValue();
12185 // Right-shifting an unsigned value by its size yields 0.
12186 // As Op + 0 = Op, return Ops[0] directly.
12187 if (ShiftAmt == 64)
12188 return Ops[0];
12189 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12190 "shrd_n");
12191 return Builder.CreateAdd(Ops[0], Ops[1]);
12192 }
12193 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12194 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12195 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12196 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12197 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12198 "lane");
12199 SmallVector<Value *, 2> ProductOps;
12200 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12201 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12202 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12203 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12204 ProductOps, "vqdmlXl");
12205 Constant *CI = ConstantInt::get(SizeTy, 0);
12206 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12207 Ops.pop_back();
12208
12209 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12210 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12211 ? Intrinsic::aarch64_neon_sqadd
12212 : Intrinsic::aarch64_neon_sqsub;
12213 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12214 }
12215 case NEON::BI__builtin_neon_vqdmlals_s32:
12216 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12217 SmallVector<Value *, 2> ProductOps;
12218 ProductOps.push_back(Ops[1]);
12219 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12220 Ops[1] =
12221 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12222 ProductOps, "vqdmlXl");
12223
12224 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12225 ? Intrinsic::aarch64_neon_sqadd
12226 : Intrinsic::aarch64_neon_sqsub;
12227 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12228 }
12229 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12230 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12231 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12232 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12233 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12234 "lane");
12235 SmallVector<Value *, 2> ProductOps;
12236 ProductOps.push_back(Ops[1]);
12237 ProductOps.push_back(Ops[2]);
12238 Ops[1] =
12239 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12240 ProductOps, "vqdmlXl");
12241 Ops.pop_back();
12242
12243 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12244 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12245 ? Intrinsic::aarch64_neon_sqadd
12246 : Intrinsic::aarch64_neon_sqsub;
12247 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12248 }
12249 case NEON::BI__builtin_neon_vget_lane_bf16:
12250 case NEON::BI__builtin_neon_vduph_lane_bf16:
12251 case NEON::BI__builtin_neon_vduph_lane_f16: {
12252 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12253 "vget_lane");
12254 }
12255 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12256 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12257 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12258 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12259 "vgetq_lane");
12260 }
12261
12262 case clang::AArch64::BI_InterlockedAdd:
12263 case clang::AArch64::BI_InterlockedAdd64: {
12264 Address DestAddr = CheckAtomicAlignment(*this, E);
12265 Value *Val = EmitScalarExpr(E->getArg(1));
12266 AtomicRMWInst *RMWI =
12267 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12268 llvm::AtomicOrdering::SequentiallyConsistent);
12269 return Builder.CreateAdd(RMWI, Val);
12270 }
12271 }
12272
12273 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12274 llvm::Type *Ty = VTy;
12275 if (!Ty)
12276 return nullptr;
12277
12278 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12279 // defer to common code if it's been added to our special map.
12282
12283 if (Builtin)
12285 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12286 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12287 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12288
12289 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12290 return V;
12291
12292 unsigned Int;
12293 switch (BuiltinID) {
12294 default: return nullptr;
12295 case NEON::BI__builtin_neon_vbsl_v:
12296 case NEON::BI__builtin_neon_vbslq_v: {
12297 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12298 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12299 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12300 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12301
12302 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12303 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12304 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12305 return Builder.CreateBitCast(Ops[0], Ty);
12306 }
12307 case NEON::BI__builtin_neon_vfma_lane_v:
12308 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12309 // The ARM builtins (and instructions) have the addend as the first
12310 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12311 Value *Addend = Ops[0];
12312 Value *Multiplicand = Ops[1];
12313 Value *LaneSource = Ops[2];
12314 Ops[0] = Multiplicand;
12315 Ops[1] = LaneSource;
12316 Ops[2] = Addend;
12317
12318 // Now adjust things to handle the lane access.
12319 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12320 ? llvm::FixedVectorType::get(VTy->getElementType(),
12321 VTy->getNumElements() / 2)
12322 : VTy;
12323 llvm::Constant *cst = cast<Constant>(Ops[3]);
12324 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12325 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12326 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12327
12328 Ops.pop_back();
12329 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12330 : Intrinsic::fma;
12331 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12332 }
12333 case NEON::BI__builtin_neon_vfma_laneq_v: {
12334 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12335 // v1f64 fma should be mapped to Neon scalar f64 fma
12336 if (VTy && VTy->getElementType() == DoubleTy) {
12337 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12338 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12339 llvm::FixedVectorType *VTy =
12341 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12342 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12343 Value *Result;
12345 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12346 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12347 return Builder.CreateBitCast(Result, Ty);
12348 }
12349 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12350 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12351
12352 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12353 VTy->getNumElements() * 2);
12354 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12355 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12356 cast<ConstantInt>(Ops[3]));
12357 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12358
12360 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12361 {Ops[2], Ops[1], Ops[0]});
12362 }
12363 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12364 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12365 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12366
12367 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12368 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12370 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12371 {Ops[2], Ops[1], Ops[0]});
12372 }
12373 case NEON::BI__builtin_neon_vfmah_lane_f16:
12374 case NEON::BI__builtin_neon_vfmas_lane_f32:
12375 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12376 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12377 case NEON::BI__builtin_neon_vfmad_lane_f64:
12378 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12379 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12380 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12381 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12383 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12384 {Ops[1], Ops[2], Ops[0]});
12385 }
12386 case NEON::BI__builtin_neon_vmull_v:
12387 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12388 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12389 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12390 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12391 case NEON::BI__builtin_neon_vmax_v:
12392 case NEON::BI__builtin_neon_vmaxq_v:
12393 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12394 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12395 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12396 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12397 case NEON::BI__builtin_neon_vmaxh_f16: {
12398 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12399 Int = Intrinsic::aarch64_neon_fmax;
12400 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12401 }
12402 case NEON::BI__builtin_neon_vmin_v:
12403 case NEON::BI__builtin_neon_vminq_v:
12404 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12405 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12406 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12407 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12408 case NEON::BI__builtin_neon_vminh_f16: {
12409 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12410 Int = Intrinsic::aarch64_neon_fmin;
12411 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12412 }
12413 case NEON::BI__builtin_neon_vabd_v:
12414 case NEON::BI__builtin_neon_vabdq_v:
12415 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12416 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12417 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12418 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12419 case NEON::BI__builtin_neon_vpadal_v:
12420 case NEON::BI__builtin_neon_vpadalq_v: {
12421 unsigned ArgElts = VTy->getNumElements();
12422 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12423 unsigned BitWidth = EltTy->getBitWidth();
12424 auto *ArgTy = llvm::FixedVectorType::get(
12425 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12426 llvm::Type* Tys[2] = { VTy, ArgTy };
12427 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12429 TmpOps.push_back(Ops[1]);
12430 Function *F = CGM.getIntrinsic(Int, Tys);
12431 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12432 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12433 return Builder.CreateAdd(tmp, addend);
12434 }
12435 case NEON::BI__builtin_neon_vpmin_v:
12436 case NEON::BI__builtin_neon_vpminq_v:
12437 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12438 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12439 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12440 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12441 case NEON::BI__builtin_neon_vpmax_v:
12442 case NEON::BI__builtin_neon_vpmaxq_v:
12443 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12444 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12445 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12446 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12447 case NEON::BI__builtin_neon_vminnm_v:
12448 case NEON::BI__builtin_neon_vminnmq_v:
12449 Int = Intrinsic::aarch64_neon_fminnm;
12450 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12451 case NEON::BI__builtin_neon_vminnmh_f16:
12452 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12453 Int = Intrinsic::aarch64_neon_fminnm;
12454 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12455 case NEON::BI__builtin_neon_vmaxnm_v:
12456 case NEON::BI__builtin_neon_vmaxnmq_v:
12457 Int = Intrinsic::aarch64_neon_fmaxnm;
12458 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12459 case NEON::BI__builtin_neon_vmaxnmh_f16:
12460 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12461 Int = Intrinsic::aarch64_neon_fmaxnm;
12462 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12463 case NEON::BI__builtin_neon_vrecpss_f32: {
12464 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12465 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12466 Ops, "vrecps");
12467 }
12468 case NEON::BI__builtin_neon_vrecpsd_f64:
12469 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12470 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12471 Ops, "vrecps");
12472 case NEON::BI__builtin_neon_vrecpsh_f16:
12473 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12474 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12475 Ops, "vrecps");
12476 case NEON::BI__builtin_neon_vqshrun_n_v:
12477 Int = Intrinsic::aarch64_neon_sqshrun;
12478 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12479 case NEON::BI__builtin_neon_vqrshrun_n_v:
12480 Int = Intrinsic::aarch64_neon_sqrshrun;
12481 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12482 case NEON::BI__builtin_neon_vqshrn_n_v:
12483 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12484 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12485 case NEON::BI__builtin_neon_vrshrn_n_v:
12486 Int = Intrinsic::aarch64_neon_rshrn;
12487 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12488 case NEON::BI__builtin_neon_vqrshrn_n_v:
12489 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12490 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12491 case NEON::BI__builtin_neon_vrndah_f16: {
12492 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12493 Int = Builder.getIsFPConstrained()
12494 ? Intrinsic::experimental_constrained_round
12495 : Intrinsic::round;
12496 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12497 }
12498 case NEON::BI__builtin_neon_vrnda_v:
12499 case NEON::BI__builtin_neon_vrndaq_v: {
12500 Int = Builder.getIsFPConstrained()
12501 ? Intrinsic::experimental_constrained_round
12502 : Intrinsic::round;
12503 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12504 }
12505 case NEON::BI__builtin_neon_vrndih_f16: {
12506 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12507 Int = Builder.getIsFPConstrained()
12508 ? Intrinsic::experimental_constrained_nearbyint
12509 : Intrinsic::nearbyint;
12510 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12511 }
12512 case NEON::BI__builtin_neon_vrndmh_f16: {
12513 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12514 Int = Builder.getIsFPConstrained()
12515 ? Intrinsic::experimental_constrained_floor
12516 : Intrinsic::floor;
12517 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12518 }
12519 case NEON::BI__builtin_neon_vrndm_v:
12520 case NEON::BI__builtin_neon_vrndmq_v: {
12521 Int = Builder.getIsFPConstrained()
12522 ? Intrinsic::experimental_constrained_floor
12523 : Intrinsic::floor;
12524 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12525 }
12526 case NEON::BI__builtin_neon_vrndnh_f16: {
12527 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12528 Int = Builder.getIsFPConstrained()
12529 ? Intrinsic::experimental_constrained_roundeven
12530 : Intrinsic::roundeven;
12531 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12532 }
12533 case NEON::BI__builtin_neon_vrndn_v:
12534 case NEON::BI__builtin_neon_vrndnq_v: {
12535 Int = Builder.getIsFPConstrained()
12536 ? Intrinsic::experimental_constrained_roundeven
12537 : Intrinsic::roundeven;
12538 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12539 }
12540 case NEON::BI__builtin_neon_vrndns_f32: {
12541 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12542 Int = Builder.getIsFPConstrained()
12543 ? Intrinsic::experimental_constrained_roundeven
12544 : Intrinsic::roundeven;
12545 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12546 }
12547 case NEON::BI__builtin_neon_vrndph_f16: {
12548 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12549 Int = Builder.getIsFPConstrained()
12550 ? Intrinsic::experimental_constrained_ceil
12551 : Intrinsic::ceil;
12552 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12553 }
12554 case NEON::BI__builtin_neon_vrndp_v:
12555 case NEON::BI__builtin_neon_vrndpq_v: {
12556 Int = Builder.getIsFPConstrained()
12557 ? Intrinsic::experimental_constrained_ceil
12558 : Intrinsic::ceil;
12559 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
12560 }
12561 case NEON::BI__builtin_neon_vrndxh_f16: {
12562 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12563 Int = Builder.getIsFPConstrained()
12564 ? Intrinsic::experimental_constrained_rint
12565 : Intrinsic::rint;
12566 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
12567 }
12568 case NEON::BI__builtin_neon_vrndx_v:
12569 case NEON::BI__builtin_neon_vrndxq_v: {
12570 Int = Builder.getIsFPConstrained()
12571 ? Intrinsic::experimental_constrained_rint
12572 : Intrinsic::rint;
12573 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
12574 }
12575 case NEON::BI__builtin_neon_vrndh_f16: {
12576 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12577 Int = Builder.getIsFPConstrained()
12578 ? Intrinsic::experimental_constrained_trunc
12579 : Intrinsic::trunc;
12580 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
12581 }
12582 case NEON::BI__builtin_neon_vrnd32x_f32:
12583 case NEON::BI__builtin_neon_vrnd32xq_f32:
12584 case NEON::BI__builtin_neon_vrnd32x_f64:
12585 case NEON::BI__builtin_neon_vrnd32xq_f64: {
12586 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12587 Int = Intrinsic::aarch64_neon_frint32x;
12588 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
12589 }
12590 case NEON::BI__builtin_neon_vrnd32z_f32:
12591 case NEON::BI__builtin_neon_vrnd32zq_f32:
12592 case NEON::BI__builtin_neon_vrnd32z_f64:
12593 case NEON::BI__builtin_neon_vrnd32zq_f64: {
12594 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12595 Int = Intrinsic::aarch64_neon_frint32z;
12596 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
12597 }
12598 case NEON::BI__builtin_neon_vrnd64x_f32:
12599 case NEON::BI__builtin_neon_vrnd64xq_f32:
12600 case NEON::BI__builtin_neon_vrnd64x_f64:
12601 case NEON::BI__builtin_neon_vrnd64xq_f64: {
12602 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12603 Int = Intrinsic::aarch64_neon_frint64x;
12604 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
12605 }
12606 case NEON::BI__builtin_neon_vrnd64z_f32:
12607 case NEON::BI__builtin_neon_vrnd64zq_f32:
12608 case NEON::BI__builtin_neon_vrnd64z_f64:
12609 case NEON::BI__builtin_neon_vrnd64zq_f64: {
12610 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12611 Int = Intrinsic::aarch64_neon_frint64z;
12612 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
12613 }
12614 case NEON::BI__builtin_neon_vrnd_v:
12615 case NEON::BI__builtin_neon_vrndq_v: {
12616 Int = Builder.getIsFPConstrained()
12617 ? Intrinsic::experimental_constrained_trunc
12618 : Intrinsic::trunc;
12619 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
12620 }
12621 case NEON::BI__builtin_neon_vcvt_f64_v:
12622 case NEON::BI__builtin_neon_vcvtq_f64_v:
12623 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12624 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
12625 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
12626 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
12627 case NEON::BI__builtin_neon_vcvt_f64_f32: {
12628 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
12629 "unexpected vcvt_f64_f32 builtin");
12630 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
12631 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12632
12633 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
12634 }
12635 case NEON::BI__builtin_neon_vcvt_f32_f64: {
12636 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
12637 "unexpected vcvt_f32_f64 builtin");
12638 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
12639 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12640
12641 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
12642 }
12643 case NEON::BI__builtin_neon_vcvt_s32_v:
12644 case NEON::BI__builtin_neon_vcvt_u32_v:
12645 case NEON::BI__builtin_neon_vcvt_s64_v:
12646 case NEON::BI__builtin_neon_vcvt_u64_v:
12647 case NEON::BI__builtin_neon_vcvt_s16_f16:
12648 case NEON::BI__builtin_neon_vcvt_u16_f16:
12649 case NEON::BI__builtin_neon_vcvtq_s32_v:
12650 case NEON::BI__builtin_neon_vcvtq_u32_v:
12651 case NEON::BI__builtin_neon_vcvtq_s64_v:
12652 case NEON::BI__builtin_neon_vcvtq_u64_v:
12653 case NEON::BI__builtin_neon_vcvtq_s16_f16:
12654 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
12655 Int =
12656 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
12657 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
12658 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
12659 }
12660 case NEON::BI__builtin_neon_vcvta_s16_f16:
12661 case NEON::BI__builtin_neon_vcvta_u16_f16:
12662 case NEON::BI__builtin_neon_vcvta_s32_v:
12663 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
12664 case NEON::BI__builtin_neon_vcvtaq_s32_v:
12665 case NEON::BI__builtin_neon_vcvta_u32_v:
12666 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
12667 case NEON::BI__builtin_neon_vcvtaq_u32_v:
12668 case NEON::BI__builtin_neon_vcvta_s64_v:
12669 case NEON::BI__builtin_neon_vcvtaq_s64_v:
12670 case NEON::BI__builtin_neon_vcvta_u64_v:
12671 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
12672 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
12673 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12674 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
12675 }
12676 case NEON::BI__builtin_neon_vcvtm_s16_f16:
12677 case NEON::BI__builtin_neon_vcvtm_s32_v:
12678 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
12679 case NEON::BI__builtin_neon_vcvtmq_s32_v:
12680 case NEON::BI__builtin_neon_vcvtm_u16_f16:
12681 case NEON::BI__builtin_neon_vcvtm_u32_v:
12682 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
12683 case NEON::BI__builtin_neon_vcvtmq_u32_v:
12684 case NEON::BI__builtin_neon_vcvtm_s64_v:
12685 case NEON::BI__builtin_neon_vcvtmq_s64_v:
12686 case NEON::BI__builtin_neon_vcvtm_u64_v:
12687 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
12688 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
12689 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12690 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
12691 }
12692 case NEON::BI__builtin_neon_vcvtn_s16_f16:
12693 case NEON::BI__builtin_neon_vcvtn_s32_v:
12694 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
12695 case NEON::BI__builtin_neon_vcvtnq_s32_v:
12696 case NEON::BI__builtin_neon_vcvtn_u16_f16:
12697 case NEON::BI__builtin_neon_vcvtn_u32_v:
12698 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
12699 case NEON::BI__builtin_neon_vcvtnq_u32_v:
12700 case NEON::BI__builtin_neon_vcvtn_s64_v:
12701 case NEON::BI__builtin_neon_vcvtnq_s64_v:
12702 case NEON::BI__builtin_neon_vcvtn_u64_v:
12703 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
12704 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
12705 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12706 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
12707 }
12708 case NEON::BI__builtin_neon_vcvtp_s16_f16:
12709 case NEON::BI__builtin_neon_vcvtp_s32_v:
12710 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
12711 case NEON::BI__builtin_neon_vcvtpq_s32_v:
12712 case NEON::BI__builtin_neon_vcvtp_u16_f16:
12713 case NEON::BI__builtin_neon_vcvtp_u32_v:
12714 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
12715 case NEON::BI__builtin_neon_vcvtpq_u32_v:
12716 case NEON::BI__builtin_neon_vcvtp_s64_v:
12717 case NEON::BI__builtin_neon_vcvtpq_s64_v:
12718 case NEON::BI__builtin_neon_vcvtp_u64_v:
12719 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
12720 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
12721 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12722 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
12723 }
12724 case NEON::BI__builtin_neon_vmulx_v:
12725 case NEON::BI__builtin_neon_vmulxq_v: {
12726 Int = Intrinsic::aarch64_neon_fmulx;
12727 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
12728 }
12729 case NEON::BI__builtin_neon_vmulxh_lane_f16:
12730 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
12731 // vmulx_lane should be mapped to Neon scalar mulx after
12732 // extracting the scalar element
12733 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12734 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12735 Ops.pop_back();
12736 Int = Intrinsic::aarch64_neon_fmulx;
12737 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
12738 }
12739 case NEON::BI__builtin_neon_vmul_lane_v:
12740 case NEON::BI__builtin_neon_vmul_laneq_v: {
12741 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
12742 bool Quad = false;
12743 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
12744 Quad = true;
12745 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12746 llvm::FixedVectorType *VTy =
12748 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12749 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12750 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
12751 return Builder.CreateBitCast(Result, Ty);
12752 }
12753 case NEON::BI__builtin_neon_vnegd_s64:
12754 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
12755 case NEON::BI__builtin_neon_vnegh_f16:
12756 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
12757 case NEON::BI__builtin_neon_vpmaxnm_v:
12758 case NEON::BI__builtin_neon_vpmaxnmq_v: {
12759 Int = Intrinsic::aarch64_neon_fmaxnmp;
12760 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
12761 }
12762 case NEON::BI__builtin_neon_vpminnm_v:
12763 case NEON::BI__builtin_neon_vpminnmq_v: {
12764 Int = Intrinsic::aarch64_neon_fminnmp;
12765 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
12766 }
12767 case NEON::BI__builtin_neon_vsqrth_f16: {
12768 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12769 Int = Builder.getIsFPConstrained()
12770 ? Intrinsic::experimental_constrained_sqrt
12771 : Intrinsic::sqrt;
12772 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
12773 }
12774 case NEON::BI__builtin_neon_vsqrt_v:
12775 case NEON::BI__builtin_neon_vsqrtq_v: {
12776 Int = Builder.getIsFPConstrained()
12777 ? Intrinsic::experimental_constrained_sqrt
12778 : Intrinsic::sqrt;
12779 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12780 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
12781 }
12782 case NEON::BI__builtin_neon_vrbit_v:
12783 case NEON::BI__builtin_neon_vrbitq_v: {
12784 Int = Intrinsic::bitreverse;
12785 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
12786 }
12787 case NEON::BI__builtin_neon_vaddv_u8:
12788 // FIXME: These are handled by the AArch64 scalar code.
12789 usgn = true;
12790 [[fallthrough]];
12791 case NEON::BI__builtin_neon_vaddv_s8: {
12792 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12793 Ty = Int32Ty;
12794 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12795 llvm::Type *Tys[2] = { Ty, VTy };
12796 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12797 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12798 return Builder.CreateTrunc(Ops[0], Int8Ty);
12799 }
12800 case NEON::BI__builtin_neon_vaddv_u16:
12801 usgn = true;
12802 [[fallthrough]];
12803 case NEON::BI__builtin_neon_vaddv_s16: {
12804 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12805 Ty = Int32Ty;
12806 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12807 llvm::Type *Tys[2] = { Ty, VTy };
12808 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12809 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12810 return Builder.CreateTrunc(Ops[0], Int16Ty);
12811 }
12812 case NEON::BI__builtin_neon_vaddvq_u8:
12813 usgn = true;
12814 [[fallthrough]];
12815 case NEON::BI__builtin_neon_vaddvq_s8: {
12816 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12817 Ty = Int32Ty;
12818 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12819 llvm::Type *Tys[2] = { Ty, VTy };
12820 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12821 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12822 return Builder.CreateTrunc(Ops[0], Int8Ty);
12823 }
12824 case NEON::BI__builtin_neon_vaddvq_u16:
12825 usgn = true;
12826 [[fallthrough]];
12827 case NEON::BI__builtin_neon_vaddvq_s16: {
12828 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12829 Ty = Int32Ty;
12830 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12831 llvm::Type *Tys[2] = { Ty, VTy };
12832 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12833 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12834 return Builder.CreateTrunc(Ops[0], Int16Ty);
12835 }
12836 case NEON::BI__builtin_neon_vmaxv_u8: {
12837 Int = Intrinsic::aarch64_neon_umaxv;
12838 Ty = Int32Ty;
12839 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12840 llvm::Type *Tys[2] = { Ty, VTy };
12841 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12842 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12843 return Builder.CreateTrunc(Ops[0], Int8Ty);
12844 }
12845 case NEON::BI__builtin_neon_vmaxv_u16: {
12846 Int = Intrinsic::aarch64_neon_umaxv;
12847 Ty = Int32Ty;
12848 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12849 llvm::Type *Tys[2] = { Ty, VTy };
12850 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12851 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12852 return Builder.CreateTrunc(Ops[0], Int16Ty);
12853 }
12854 case NEON::BI__builtin_neon_vmaxvq_u8: {
12855 Int = Intrinsic::aarch64_neon_umaxv;
12856 Ty = Int32Ty;
12857 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12858 llvm::Type *Tys[2] = { Ty, VTy };
12859 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12860 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12861 return Builder.CreateTrunc(Ops[0], Int8Ty);
12862 }
12863 case NEON::BI__builtin_neon_vmaxvq_u16: {
12864 Int = Intrinsic::aarch64_neon_umaxv;
12865 Ty = Int32Ty;
12866 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12867 llvm::Type *Tys[2] = { Ty, VTy };
12868 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12869 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12870 return Builder.CreateTrunc(Ops[0], Int16Ty);
12871 }
12872 case NEON::BI__builtin_neon_vmaxv_s8: {
12873 Int = Intrinsic::aarch64_neon_smaxv;
12874 Ty = Int32Ty;
12875 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12876 llvm::Type *Tys[2] = { Ty, VTy };
12877 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12878 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12879 return Builder.CreateTrunc(Ops[0], Int8Ty);
12880 }
12881 case NEON::BI__builtin_neon_vmaxv_s16: {
12882 Int = Intrinsic::aarch64_neon_smaxv;
12883 Ty = Int32Ty;
12884 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12885 llvm::Type *Tys[2] = { Ty, VTy };
12886 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12887 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12888 return Builder.CreateTrunc(Ops[0], Int16Ty);
12889 }
12890 case NEON::BI__builtin_neon_vmaxvq_s8: {
12891 Int = Intrinsic::aarch64_neon_smaxv;
12892 Ty = Int32Ty;
12893 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12894 llvm::Type *Tys[2] = { Ty, VTy };
12895 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12896 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12897 return Builder.CreateTrunc(Ops[0], Int8Ty);
12898 }
12899 case NEON::BI__builtin_neon_vmaxvq_s16: {
12900 Int = Intrinsic::aarch64_neon_smaxv;
12901 Ty = Int32Ty;
12902 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12903 llvm::Type *Tys[2] = { Ty, VTy };
12904 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12905 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12906 return Builder.CreateTrunc(Ops[0], Int16Ty);
12907 }
12908 case NEON::BI__builtin_neon_vmaxv_f16: {
12909 Int = Intrinsic::aarch64_neon_fmaxv;
12910 Ty = HalfTy;
12911 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12912 llvm::Type *Tys[2] = { Ty, VTy };
12913 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12914 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12915 return Builder.CreateTrunc(Ops[0], HalfTy);
12916 }
12917 case NEON::BI__builtin_neon_vmaxvq_f16: {
12918 Int = Intrinsic::aarch64_neon_fmaxv;
12919 Ty = HalfTy;
12920 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12921 llvm::Type *Tys[2] = { Ty, VTy };
12922 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12923 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12924 return Builder.CreateTrunc(Ops[0], HalfTy);
12925 }
12926 case NEON::BI__builtin_neon_vminv_u8: {
12927 Int = Intrinsic::aarch64_neon_uminv;
12928 Ty = Int32Ty;
12929 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12930 llvm::Type *Tys[2] = { Ty, VTy };
12931 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12932 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12933 return Builder.CreateTrunc(Ops[0], Int8Ty);
12934 }
12935 case NEON::BI__builtin_neon_vminv_u16: {
12936 Int = Intrinsic::aarch64_neon_uminv;
12937 Ty = Int32Ty;
12938 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12939 llvm::Type *Tys[2] = { Ty, VTy };
12940 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12941 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12942 return Builder.CreateTrunc(Ops[0], Int16Ty);
12943 }
12944 case NEON::BI__builtin_neon_vminvq_u8: {
12945 Int = Intrinsic::aarch64_neon_uminv;
12946 Ty = Int32Ty;
12947 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12948 llvm::Type *Tys[2] = { Ty, VTy };
12949 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12950 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12951 return Builder.CreateTrunc(Ops[0], Int8Ty);
12952 }
12953 case NEON::BI__builtin_neon_vminvq_u16: {
12954 Int = Intrinsic::aarch64_neon_uminv;
12955 Ty = Int32Ty;
12956 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12957 llvm::Type *Tys[2] = { Ty, VTy };
12958 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12959 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12960 return Builder.CreateTrunc(Ops[0], Int16Ty);
12961 }
12962 case NEON::BI__builtin_neon_vminv_s8: {
12963 Int = Intrinsic::aarch64_neon_sminv;
12964 Ty = Int32Ty;
12965 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12966 llvm::Type *Tys[2] = { Ty, VTy };
12967 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12968 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12969 return Builder.CreateTrunc(Ops[0], Int8Ty);
12970 }
12971 case NEON::BI__builtin_neon_vminv_s16: {
12972 Int = Intrinsic::aarch64_neon_sminv;
12973 Ty = Int32Ty;
12974 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12975 llvm::Type *Tys[2] = { Ty, VTy };
12976 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12977 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12978 return Builder.CreateTrunc(Ops[0], Int16Ty);
12979 }
12980 case NEON::BI__builtin_neon_vminvq_s8: {
12981 Int = Intrinsic::aarch64_neon_sminv;
12982 Ty = Int32Ty;
12983 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12984 llvm::Type *Tys[2] = { Ty, VTy };
12985 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12986 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12987 return Builder.CreateTrunc(Ops[0], Int8Ty);
12988 }
12989 case NEON::BI__builtin_neon_vminvq_s16: {
12990 Int = Intrinsic::aarch64_neon_sminv;
12991 Ty = Int32Ty;
12992 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12993 llvm::Type *Tys[2] = { Ty, VTy };
12994 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12995 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12996 return Builder.CreateTrunc(Ops[0], Int16Ty);
12997 }
12998 case NEON::BI__builtin_neon_vminv_f16: {
12999 Int = Intrinsic::aarch64_neon_fminv;
13000 Ty = HalfTy;
13001 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13002 llvm::Type *Tys[2] = { Ty, VTy };
13003 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13004 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13005 return Builder.CreateTrunc(Ops[0], HalfTy);
13006 }
13007 case NEON::BI__builtin_neon_vminvq_f16: {
13008 Int = Intrinsic::aarch64_neon_fminv;
13009 Ty = HalfTy;
13010 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13011 llvm::Type *Tys[2] = { Ty, VTy };
13012 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13013 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13014 return Builder.CreateTrunc(Ops[0], HalfTy);
13015 }
13016 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13017 Int = Intrinsic::aarch64_neon_fmaxnmv;
13018 Ty = HalfTy;
13019 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13020 llvm::Type *Tys[2] = { Ty, VTy };
13021 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13022 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13023 return Builder.CreateTrunc(Ops[0], HalfTy);
13024 }
13025 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13026 Int = Intrinsic::aarch64_neon_fmaxnmv;
13027 Ty = HalfTy;
13028 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13029 llvm::Type *Tys[2] = { Ty, VTy };
13030 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13031 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13032 return Builder.CreateTrunc(Ops[0], HalfTy);
13033 }
13034 case NEON::BI__builtin_neon_vminnmv_f16: {
13035 Int = Intrinsic::aarch64_neon_fminnmv;
13036 Ty = HalfTy;
13037 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13038 llvm::Type *Tys[2] = { Ty, VTy };
13039 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13040 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13041 return Builder.CreateTrunc(Ops[0], HalfTy);
13042 }
13043 case NEON::BI__builtin_neon_vminnmvq_f16: {
13044 Int = Intrinsic::aarch64_neon_fminnmv;
13045 Ty = HalfTy;
13046 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13047 llvm::Type *Tys[2] = { Ty, VTy };
13048 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13049 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13050 return Builder.CreateTrunc(Ops[0], HalfTy);
13051 }
13052 case NEON::BI__builtin_neon_vmul_n_f64: {
13053 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13054 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13055 return Builder.CreateFMul(Ops[0], RHS);
13056 }
13057 case NEON::BI__builtin_neon_vaddlv_u8: {
13058 Int = Intrinsic::aarch64_neon_uaddlv;
13059 Ty = Int32Ty;
13060 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13061 llvm::Type *Tys[2] = { Ty, VTy };
13062 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13063 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13064 return Builder.CreateTrunc(Ops[0], Int16Ty);
13065 }
13066 case NEON::BI__builtin_neon_vaddlv_u16: {
13067 Int = Intrinsic::aarch64_neon_uaddlv;
13068 Ty = Int32Ty;
13069 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13070 llvm::Type *Tys[2] = { Ty, VTy };
13071 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13072 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13073 }
13074 case NEON::BI__builtin_neon_vaddlvq_u8: {
13075 Int = Intrinsic::aarch64_neon_uaddlv;
13076 Ty = Int32Ty;
13077 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13078 llvm::Type *Tys[2] = { Ty, VTy };
13079 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13080 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13081 return Builder.CreateTrunc(Ops[0], Int16Ty);
13082 }
13083 case NEON::BI__builtin_neon_vaddlvq_u16: {
13084 Int = Intrinsic::aarch64_neon_uaddlv;
13085 Ty = Int32Ty;
13086 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13087 llvm::Type *Tys[2] = { Ty, VTy };
13088 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13089 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13090 }
13091 case NEON::BI__builtin_neon_vaddlv_s8: {
13092 Int = Intrinsic::aarch64_neon_saddlv;
13093 Ty = Int32Ty;
13094 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13095 llvm::Type *Tys[2] = { Ty, VTy };
13096 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13097 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13098 return Builder.CreateTrunc(Ops[0], Int16Ty);
13099 }
13100 case NEON::BI__builtin_neon_vaddlv_s16: {
13101 Int = Intrinsic::aarch64_neon_saddlv;
13102 Ty = Int32Ty;
13103 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13104 llvm::Type *Tys[2] = { Ty, VTy };
13105 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13106 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13107 }
13108 case NEON::BI__builtin_neon_vaddlvq_s8: {
13109 Int = Intrinsic::aarch64_neon_saddlv;
13110 Ty = Int32Ty;
13111 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13112 llvm::Type *Tys[2] = { Ty, VTy };
13113 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13114 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13115 return Builder.CreateTrunc(Ops[0], Int16Ty);
13116 }
13117 case NEON::BI__builtin_neon_vaddlvq_s16: {
13118 Int = Intrinsic::aarch64_neon_saddlv;
13119 Ty = Int32Ty;
13120 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13121 llvm::Type *Tys[2] = { Ty, VTy };
13122 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13123 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13124 }
13125 case NEON::BI__builtin_neon_vsri_n_v:
13126 case NEON::BI__builtin_neon_vsriq_n_v: {
13127 Int = Intrinsic::aarch64_neon_vsri;
13128 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13129 return EmitNeonCall(Intrin, Ops, "vsri_n");
13130 }
13131 case NEON::BI__builtin_neon_vsli_n_v:
13132 case NEON::BI__builtin_neon_vsliq_n_v: {
13133 Int = Intrinsic::aarch64_neon_vsli;
13134 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13135 return EmitNeonCall(Intrin, Ops, "vsli_n");
13136 }
13137 case NEON::BI__builtin_neon_vsra_n_v:
13138 case NEON::BI__builtin_neon_vsraq_n_v:
13139 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13140 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13141 return Builder.CreateAdd(Ops[0], Ops[1]);
13142 case NEON::BI__builtin_neon_vrsra_n_v:
13143 case NEON::BI__builtin_neon_vrsraq_n_v: {
13144 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13146 TmpOps.push_back(Ops[1]);
13147 TmpOps.push_back(Ops[2]);
13148 Function* F = CGM.getIntrinsic(Int, Ty);
13149 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13150 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13151 return Builder.CreateAdd(Ops[0], tmp);
13152 }
13153 case NEON::BI__builtin_neon_vld1_v:
13154 case NEON::BI__builtin_neon_vld1q_v: {
13155 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13156 }
13157 case NEON::BI__builtin_neon_vst1_v:
13158 case NEON::BI__builtin_neon_vst1q_v:
13159 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13160 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13161 case NEON::BI__builtin_neon_vld1_lane_v:
13162 case NEON::BI__builtin_neon_vld1q_lane_v: {
13163 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13164 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13165 PtrOp0.getAlignment());
13166 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13167 }
13168 case NEON::BI__builtin_neon_vldap1_lane_s64:
13169 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13170 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13171 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13172 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13173 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13174 Ops[0] = LI;
13175 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13176 }
13177 case NEON::BI__builtin_neon_vld1_dup_v:
13178 case NEON::BI__builtin_neon_vld1q_dup_v: {
13179 Value *V = PoisonValue::get(Ty);
13180 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13181 PtrOp0.getAlignment());
13182 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13183 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13184 return EmitNeonSplat(Ops[0], CI);
13185 }
13186 case NEON::BI__builtin_neon_vst1_lane_v:
13187 case NEON::BI__builtin_neon_vst1q_lane_v:
13188 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13189 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13190 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13191 case NEON::BI__builtin_neon_vstl1_lane_s64:
13192 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13193 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13194 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13195 llvm::StoreInst *SI =
13196 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13197 SI->setAtomic(llvm::AtomicOrdering::Release);
13198 return SI;
13199 }
13200 case NEON::BI__builtin_neon_vld2_v:
13201 case NEON::BI__builtin_neon_vld2q_v: {
13202 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13203 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13204 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13205 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13206 }
13207 case NEON::BI__builtin_neon_vld3_v:
13208 case NEON::BI__builtin_neon_vld3q_v: {
13209 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13210 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13211 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13212 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13213 }
13214 case NEON::BI__builtin_neon_vld4_v:
13215 case NEON::BI__builtin_neon_vld4q_v: {
13216 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13217 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13218 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13219 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13220 }
13221 case NEON::BI__builtin_neon_vld2_dup_v:
13222 case NEON::BI__builtin_neon_vld2q_dup_v: {
13223 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13224 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13225 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13226 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13227 }
13228 case NEON::BI__builtin_neon_vld3_dup_v:
13229 case NEON::BI__builtin_neon_vld3q_dup_v: {
13230 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13231 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13232 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13233 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13234 }
13235 case NEON::BI__builtin_neon_vld4_dup_v:
13236 case NEON::BI__builtin_neon_vld4q_dup_v: {
13237 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13238 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13239 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13240 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13241 }
13242 case NEON::BI__builtin_neon_vld2_lane_v:
13243 case NEON::BI__builtin_neon_vld2q_lane_v: {
13244 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13245 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13246 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13247 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13248 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13249 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13250 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13251 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13252 }
13253 case NEON::BI__builtin_neon_vld3_lane_v:
13254 case NEON::BI__builtin_neon_vld3q_lane_v: {
13255 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13256 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13257 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13258 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13259 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13260 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13261 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13262 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13263 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13264 }
13265 case NEON::BI__builtin_neon_vld4_lane_v:
13266 case NEON::BI__builtin_neon_vld4q_lane_v: {
13267 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13268 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13269 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13270 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13271 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13272 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13273 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13274 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13275 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13276 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13277 }
13278 case NEON::BI__builtin_neon_vst2_v:
13279 case NEON::BI__builtin_neon_vst2q_v: {
13280 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13281 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13282 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13283 Ops, "");
13284 }
13285 case NEON::BI__builtin_neon_vst2_lane_v:
13286 case NEON::BI__builtin_neon_vst2q_lane_v: {
13287 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13288 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13289 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13290 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13291 Ops, "");
13292 }
13293 case NEON::BI__builtin_neon_vst3_v:
13294 case NEON::BI__builtin_neon_vst3q_v: {
13295 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13296 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13297 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13298 Ops, "");
13299 }
13300 case NEON::BI__builtin_neon_vst3_lane_v:
13301 case NEON::BI__builtin_neon_vst3q_lane_v: {
13302 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13303 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13304 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13305 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13306 Ops, "");
13307 }
13308 case NEON::BI__builtin_neon_vst4_v:
13309 case NEON::BI__builtin_neon_vst4q_v: {
13310 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13311 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13312 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13313 Ops, "");
13314 }
13315 case NEON::BI__builtin_neon_vst4_lane_v:
13316 case NEON::BI__builtin_neon_vst4q_lane_v: {
13317 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13318 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13319 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13320 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13321 Ops, "");
13322 }
13323 case NEON::BI__builtin_neon_vtrn_v:
13324 case NEON::BI__builtin_neon_vtrnq_v: {
13325 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13326 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13327 Value *SV = nullptr;
13328
13329 for (unsigned vi = 0; vi != 2; ++vi) {
13330 SmallVector<int, 16> Indices;
13331 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13332 Indices.push_back(i+vi);
13333 Indices.push_back(i+e+vi);
13334 }
13335 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13336 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13337 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13338 }
13339 return SV;
13340 }
13341 case NEON::BI__builtin_neon_vuzp_v:
13342 case NEON::BI__builtin_neon_vuzpq_v: {
13343 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13344 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13345 Value *SV = nullptr;
13346
13347 for (unsigned vi = 0; vi != 2; ++vi) {
13348 SmallVector<int, 16> Indices;
13349 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13350 Indices.push_back(2*i+vi);
13351
13352 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13353 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13354 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13355 }
13356 return SV;
13357 }
13358 case NEON::BI__builtin_neon_vzip_v:
13359 case NEON::BI__builtin_neon_vzipq_v: {
13360 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13361 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13362 Value *SV = nullptr;
13363
13364 for (unsigned vi = 0; vi != 2; ++vi) {
13365 SmallVector<int, 16> Indices;
13366 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13367 Indices.push_back((i + vi*e) >> 1);
13368 Indices.push_back(((i + vi*e) >> 1)+e);
13369 }
13370 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13371 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13372 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13373 }
13374 return SV;
13375 }
13376 case NEON::BI__builtin_neon_vqtbl1q_v: {
13377 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13378 Ops, "vtbl1");
13379 }
13380 case NEON::BI__builtin_neon_vqtbl2q_v: {
13381 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13382 Ops, "vtbl2");
13383 }
13384 case NEON::BI__builtin_neon_vqtbl3q_v: {
13385 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13386 Ops, "vtbl3");
13387 }
13388 case NEON::BI__builtin_neon_vqtbl4q_v: {
13389 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13390 Ops, "vtbl4");
13391 }
13392 case NEON::BI__builtin_neon_vqtbx1q_v: {
13393 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13394 Ops, "vtbx1");
13395 }
13396 case NEON::BI__builtin_neon_vqtbx2q_v: {
13397 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13398 Ops, "vtbx2");
13399 }
13400 case NEON::BI__builtin_neon_vqtbx3q_v: {
13401 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13402 Ops, "vtbx3");
13403 }
13404 case NEON::BI__builtin_neon_vqtbx4q_v: {
13405 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13406 Ops, "vtbx4");
13407 }
13408 case NEON::BI__builtin_neon_vsqadd_v:
13409 case NEON::BI__builtin_neon_vsqaddq_v: {
13410 Int = Intrinsic::aarch64_neon_usqadd;
13411 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13412 }
13413 case NEON::BI__builtin_neon_vuqadd_v:
13414 case NEON::BI__builtin_neon_vuqaddq_v: {
13415 Int = Intrinsic::aarch64_neon_suqadd;
13416 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13417 }
13418 }
13419}
13420
13421Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13422 const CallExpr *E) {
13423 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13424 BuiltinID == BPF::BI__builtin_btf_type_id ||
13425 BuiltinID == BPF::BI__builtin_preserve_type_info ||
13426 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13427 "unexpected BPF builtin");
13428
13429 // A sequence number, injected into IR builtin functions, to
13430 // prevent CSE given the only difference of the function
13431 // may just be the debuginfo metadata.
13432 static uint32_t BuiltinSeqNum;
13433
13434 switch (BuiltinID) {
13435 default:
13436 llvm_unreachable("Unexpected BPF builtin");
13437 case BPF::BI__builtin_preserve_field_info: {
13438 const Expr *Arg = E->getArg(0);
13439 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13440
13441 if (!getDebugInfo()) {
13442 CGM.Error(E->getExprLoc(),
13443 "using __builtin_preserve_field_info() without -g");
13444 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13445 : EmitLValue(Arg).emitRawPointer(*this);
13446 }
13447
13448 // Enable underlying preserve_*_access_index() generation.
13449 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
13450 IsInPreservedAIRegion = true;
13451 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13452 : EmitLValue(Arg).emitRawPointer(*this);
13453 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
13454
13455 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13456 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
13457
13458 // Built the IR for the preserve_field_info intrinsic.
13459 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
13460 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
13461 {FieldAddr->getType()});
13462 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
13463 }
13464 case BPF::BI__builtin_btf_type_id:
13465 case BPF::BI__builtin_preserve_type_info: {
13466 if (!getDebugInfo()) {
13467 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13468 return nullptr;
13469 }
13470
13471 const Expr *Arg0 = E->getArg(0);
13472 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13473 Arg0->getType(), Arg0->getExprLoc());
13474
13475 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13476 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13477 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13478
13479 llvm::Function *FnDecl;
13480 if (BuiltinID == BPF::BI__builtin_btf_type_id)
13481 FnDecl = llvm::Intrinsic::getDeclaration(
13482 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
13483 else
13484 FnDecl = llvm::Intrinsic::getDeclaration(
13485 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
13486 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
13487 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13488 return Fn;
13489 }
13490 case BPF::BI__builtin_preserve_enum_value: {
13491 if (!getDebugInfo()) {
13492 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13493 return nullptr;
13494 }
13495
13496 const Expr *Arg0 = E->getArg(0);
13497 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13498 Arg0->getType(), Arg0->getExprLoc());
13499
13500 // Find enumerator
13501 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
13502 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
13503 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
13504 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
13505
13506 auto InitVal = Enumerator->getInitVal();
13507 std::string InitValStr;
13508 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
13509 InitValStr = std::to_string(InitVal.getSExtValue());
13510 else
13511 InitValStr = std::to_string(InitVal.getZExtValue());
13512 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
13513 Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
13514
13515 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13516 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13517 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13518
13519 llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
13520 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
13521 CallInst *Fn =
13522 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
13523 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13524 return Fn;
13525 }
13526 }
13527}
13528
13529llvm::Value *CodeGenFunction::
13531 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
13532 "Not a power-of-two sized vector!");
13533 bool AllConstants = true;
13534 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
13535 AllConstants &= isa<Constant>(Ops[i]);
13536
13537 // If this is a constant vector, create a ConstantVector.
13538 if (AllConstants) {
13540 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13541 CstOps.push_back(cast<Constant>(Ops[i]));
13542 return llvm::ConstantVector::get(CstOps);
13543 }
13544
13545 // Otherwise, insertelement the values to build the vector.
13546 Value *Result = llvm::PoisonValue::get(
13547 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
13548
13549 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13550 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
13551
13552 return Result;
13553}
13554
13555// Convert the mask from an integer type to a vector of i1.
13557 unsigned NumElts) {
13558
13559 auto *MaskTy = llvm::FixedVectorType::get(
13560 CGF.Builder.getInt1Ty(),
13561 cast<IntegerType>(Mask->getType())->getBitWidth());
13562 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
13563
13564 // If we have less than 8 elements, then the starting mask was an i8 and
13565 // we need to extract down to the right number of elements.
13566 if (NumElts < 8) {
13567 int Indices[4];
13568 for (unsigned i = 0; i != NumElts; ++i)
13569 Indices[i] = i;
13570 MaskVec = CGF.Builder.CreateShuffleVector(
13571 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
13572 }
13573 return MaskVec;
13574}
13575
13577 Align Alignment) {
13578 Value *Ptr = Ops[0];
13579
13580 Value *MaskVec = getMaskVecValue(
13581 CGF, Ops[2],
13582 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
13583
13584 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
13585}
13586
13588 Align Alignment) {
13589 llvm::Type *Ty = Ops[1]->getType();
13590 Value *Ptr = Ops[0];
13591
13592 Value *MaskVec = getMaskVecValue(
13593 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
13594
13595 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
13596}
13597
13599 ArrayRef<Value *> Ops) {
13600 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
13601 Value *Ptr = Ops[0];
13602
13603 Value *MaskVec = getMaskVecValue(
13604 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
13605
13606 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
13607 ResultTy);
13608 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
13609}
13610
13613 bool IsCompress) {
13614 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13615
13616 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13617
13618 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
13619 : Intrinsic::x86_avx512_mask_expand;
13620 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
13621 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
13622}
13623
13625 ArrayRef<Value *> Ops) {
13626 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13627 Value *Ptr = Ops[0];
13628
13629 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13630
13631 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
13632 ResultTy);
13633 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
13634}
13635
13636static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
13638 bool InvertLHS = false) {
13639 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13640 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
13641 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
13642
13643 if (InvertLHS)
13644 LHS = CGF.Builder.CreateNot(LHS);
13645
13646 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
13647 Ops[0]->getType());
13648}
13649
13651 Value *Amt, bool IsRight) {
13652 llvm::Type *Ty = Op0->getType();
13653
13654 // Amount may be scalar immediate, in which case create a splat vector.
13655 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
13656 // we only care about the lowest log2 bits anyway.
13657 if (Amt->getType() != Ty) {
13658 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
13659 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
13660 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
13661 }
13662
13663 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
13664 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
13665 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
13666}
13667
13669 bool IsSigned) {
13670 Value *Op0 = Ops[0];
13671 Value *Op1 = Ops[1];
13672 llvm::Type *Ty = Op0->getType();
13673 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
13674
13675 CmpInst::Predicate Pred;
13676 switch (Imm) {
13677 case 0x0:
13678 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
13679 break;
13680 case 0x1:
13681 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
13682 break;
13683 case 0x2:
13684 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13685 break;
13686 case 0x3:
13687 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13688 break;
13689 case 0x4:
13690 Pred = ICmpInst::ICMP_EQ;
13691 break;
13692 case 0x5:
13693 Pred = ICmpInst::ICMP_NE;
13694 break;
13695 case 0x6:
13696 return llvm::Constant::getNullValue(Ty); // FALSE
13697 case 0x7:
13698 return llvm::Constant::getAllOnesValue(Ty); // TRUE
13699 default:
13700 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
13701 }
13702
13703 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
13704 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
13705 return Res;
13706}
13707
13709 Value *Mask, Value *Op0, Value *Op1) {
13710
13711 // If the mask is all ones just return first argument.
13712 if (const auto *C = dyn_cast<Constant>(Mask))
13713 if (C->isAllOnesValue())
13714 return Op0;
13715
13716 Mask = getMaskVecValue(
13717 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
13718
13719 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13720}
13721
13723 Value *Mask, Value *Op0, Value *Op1) {
13724 // If the mask is all ones just return first argument.
13725 if (const auto *C = dyn_cast<Constant>(Mask))
13726 if (C->isAllOnesValue())
13727 return Op0;
13728
13729 auto *MaskTy = llvm::FixedVectorType::get(
13730 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
13731 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
13732 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
13733 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13734}
13735
13737 unsigned NumElts, Value *MaskIn) {
13738 if (MaskIn) {
13739 const auto *C = dyn_cast<Constant>(MaskIn);
13740 if (!C || !C->isAllOnesValue())
13741 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
13742 }
13743
13744 if (NumElts < 8) {
13745 int Indices[8];
13746 for (unsigned i = 0; i != NumElts; ++i)
13747 Indices[i] = i;
13748 for (unsigned i = NumElts; i != 8; ++i)
13749 Indices[i] = i % NumElts + NumElts;
13750 Cmp = CGF.Builder.CreateShuffleVector(
13751 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
13752 }
13753
13754 return CGF.Builder.CreateBitCast(Cmp,
13755 IntegerType::get(CGF.getLLVMContext(),
13756 std::max(NumElts, 8U)));
13757}
13758
13760 bool Signed, ArrayRef<Value *> Ops) {
13761 assert((Ops.size() == 2 || Ops.size() == 4) &&
13762 "Unexpected number of arguments");
13763 unsigned NumElts =
13764 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13765 Value *Cmp;
13766
13767 if (CC == 3) {
13768 Cmp = Constant::getNullValue(
13769 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13770 } else if (CC == 7) {
13771 Cmp = Constant::getAllOnesValue(
13772 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13773 } else {
13774 ICmpInst::Predicate Pred;
13775 switch (CC) {
13776 default: llvm_unreachable("Unknown condition code");
13777 case 0: Pred = ICmpInst::ICMP_EQ; break;
13778 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
13779 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
13780 case 4: Pred = ICmpInst::ICMP_NE; break;
13781 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
13782 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
13783 }
13784 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
13785 }
13786
13787 Value *MaskIn = nullptr;
13788 if (Ops.size() == 4)
13789 MaskIn = Ops[3];
13790
13791 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
13792}
13793
13795 Value *Zero = Constant::getNullValue(In->getType());
13796 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
13797}
13798
13800 ArrayRef<Value *> Ops, bool IsSigned) {
13801 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
13802 llvm::Type *Ty = Ops[1]->getType();
13803
13804 Value *Res;
13805 if (Rnd != 4) {
13806 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
13807 : Intrinsic::x86_avx512_uitofp_round;
13808 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
13809 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
13810 } else {
13811 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13812 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
13813 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
13814 }
13815
13816 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13817}
13818
13819// Lowers X86 FMA intrinsics to IR.
13821 ArrayRef<Value *> Ops, unsigned BuiltinID,
13822 bool IsAddSub) {
13823
13824 bool Subtract = false;
13825 Intrinsic::ID IID = Intrinsic::not_intrinsic;
13826 switch (BuiltinID) {
13827 default: break;
13828 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13829 Subtract = true;
13830 [[fallthrough]];
13831 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13832 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13833 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13834 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
13835 break;
13836 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13837 Subtract = true;
13838 [[fallthrough]];
13839 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13840 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13841 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13842 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
13843 break;
13844 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13845 Subtract = true;
13846 [[fallthrough]];
13847 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13848 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13849 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13850 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13851 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13852 Subtract = true;
13853 [[fallthrough]];
13854 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13855 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13856 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13857 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13858 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13859 Subtract = true;
13860 [[fallthrough]];
13861 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13862 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13863 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13864 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13865 break;
13866 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13867 Subtract = true;
13868 [[fallthrough]];
13869 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13870 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13871 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13872 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13873 break;
13874 }
13875
13876 Value *A = Ops[0];
13877 Value *B = Ops[1];
13878 Value *C = Ops[2];
13879
13880 if (Subtract)
13881 C = CGF.Builder.CreateFNeg(C);
13882
13883 Value *Res;
13884
13885 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
13886 if (IID != Intrinsic::not_intrinsic &&
13887 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
13888 IsAddSub)) {
13889 Function *Intr = CGF.CGM.getIntrinsic(IID);
13890 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
13891 } else {
13892 llvm::Type *Ty = A->getType();
13893 Function *FMA;
13894 if (CGF.Builder.getIsFPConstrained()) {
13895 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13896 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
13897 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
13898 } else {
13899 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
13900 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
13901 }
13902 }
13903
13904 // Handle any required masking.
13905 Value *MaskFalseVal = nullptr;
13906 switch (BuiltinID) {
13907 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13908 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13909 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13910 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13911 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13912 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13913 MaskFalseVal = Ops[0];
13914 break;
13915 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13916 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13917 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13918 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13919 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13920 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13921 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
13922 break;
13923 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13924 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13925 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13926 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13927 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13928 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13929 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13930 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13931 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13932 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13933 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13934 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13935 MaskFalseVal = Ops[2];
13936 break;
13937 }
13938
13939 if (MaskFalseVal)
13940 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
13941
13942 return Res;
13943}
13944
13946 MutableArrayRef<Value *> Ops, Value *Upper,
13947 bool ZeroMask = false, unsigned PTIdx = 0,
13948 bool NegAcc = false) {
13949 unsigned Rnd = 4;
13950 if (Ops.size() > 4)
13951 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
13952
13953 if (NegAcc)
13954 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
13955
13956 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
13957 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
13958 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
13959 Value *Res;
13960 if (Rnd != 4) {
13961 Intrinsic::ID IID;
13962
13963 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
13964 case 16:
13965 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
13966 break;
13967 case 32:
13968 IID = Intrinsic::x86_avx512_vfmadd_f32;
13969 break;
13970 case 64:
13971 IID = Intrinsic::x86_avx512_vfmadd_f64;
13972 break;
13973 default:
13974 llvm_unreachable("Unexpected size");
13975 }
13976 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
13977 {Ops[0], Ops[1], Ops[2], Ops[4]});
13978 } else if (CGF.Builder.getIsFPConstrained()) {
13979 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13980 Function *FMA = CGF.CGM.getIntrinsic(
13981 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
13982 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
13983 } else {
13984 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
13985 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
13986 }
13987 // If we have more than 3 arguments, we need to do masking.
13988 if (Ops.size() > 3) {
13989 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
13990 : Ops[PTIdx];
13991
13992 // If we negated the accumulator and the its the PassThru value we need to
13993 // bypass the negate. Conveniently Upper should be the same thing in this
13994 // case.
13995 if (NegAcc && PTIdx == 2)
13996 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
13997
13998 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
13999 }
14000 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14001}
14002
14003static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14004 ArrayRef<Value *> Ops) {
14005 llvm::Type *Ty = Ops[0]->getType();
14006 // Arguments have a vXi32 type so cast to vXi64.
14007 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14008 Ty->getPrimitiveSizeInBits() / 64);
14009 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14010 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14011
14012 if (IsSigned) {
14013 // Shift left then arithmetic shift right.
14014 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14015 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14016 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14017 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14018 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14019 } else {
14020 // Clear the upper bits.
14021 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14022 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14023 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14024 }
14025
14026 return CGF.Builder.CreateMul(LHS, RHS);
14027}
14028
14029// Emit a masked pternlog intrinsic. This only exists because the header has to
14030// use a macro and we aren't able to pass the input argument to a pternlog
14031// builtin and a select builtin without evaluating it twice.
14032static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14033 ArrayRef<Value *> Ops) {
14034 llvm::Type *Ty = Ops[0]->getType();
14035
14036 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14037 unsigned EltWidth = Ty->getScalarSizeInBits();
14038 Intrinsic::ID IID;
14039 if (VecWidth == 128 && EltWidth == 32)
14040 IID = Intrinsic::x86_avx512_pternlog_d_128;
14041 else if (VecWidth == 256 && EltWidth == 32)
14042 IID = Intrinsic::x86_avx512_pternlog_d_256;
14043 else if (VecWidth == 512 && EltWidth == 32)
14044 IID = Intrinsic::x86_avx512_pternlog_d_512;
14045 else if (VecWidth == 128 && EltWidth == 64)
14046 IID = Intrinsic::x86_avx512_pternlog_q_128;
14047 else if (VecWidth == 256 && EltWidth == 64)
14048 IID = Intrinsic::x86_avx512_pternlog_q_256;
14049 else if (VecWidth == 512 && EltWidth == 64)
14050 IID = Intrinsic::x86_avx512_pternlog_q_512;
14051 else
14052 llvm_unreachable("Unexpected intrinsic");
14053
14054 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14055 Ops.drop_back());
14056 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14057 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14058}
14059
14061 llvm::Type *DstTy) {
14062 unsigned NumberOfElements =
14063 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14064 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14065 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14066}
14067
14068Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14069 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14070 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14071 return EmitX86CpuIs(CPUStr);
14072}
14073
14074// Convert F16 halfs to floats.
14077 llvm::Type *DstTy) {
14078 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14079 "Unknown cvtph2ps intrinsic");
14080
14081 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14082 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14083 Function *F =
14084 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14085 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14086 }
14087
14088 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14089 Value *Src = Ops[0];
14090
14091 // Extract the subvector.
14092 if (NumDstElts !=
14093 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14094 assert(NumDstElts == 4 && "Unexpected vector size");
14095 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14096 }
14097
14098 // Bitcast from vXi16 to vXf16.
14099 auto *HalfTy = llvm::FixedVectorType::get(
14100 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14101 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14102
14103 // Perform the fp-extension.
14104 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14105
14106 if (Ops.size() >= 3)
14107 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14108 return Res;
14109}
14110
14111Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14112
14113 llvm::Type *Int32Ty = Builder.getInt32Ty();
14114
14115 // Matching the struct layout from the compiler-rt/libgcc structure that is
14116 // filled in:
14117 // unsigned int __cpu_vendor;
14118 // unsigned int __cpu_type;
14119 // unsigned int __cpu_subtype;
14120 // unsigned int __cpu_features[1];
14121 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14122 llvm::ArrayType::get(Int32Ty, 1));
14123
14124 // Grab the global __cpu_model.
14125 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14126 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14127
14128 // Calculate the index needed to access the correct field based on the
14129 // range. Also adjust the expected value.
14130 unsigned Index;
14131 unsigned Value;
14132 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14133#define X86_VENDOR(ENUM, STRING) \
14134 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14135#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14136 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14137#define X86_CPU_TYPE(ENUM, STR) \
14138 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14139#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14140 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14141#define X86_CPU_SUBTYPE(ENUM, STR) \
14142 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14143#include "llvm/TargetParser/X86TargetParser.def"
14144 .Default({0, 0});
14145 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14146
14147 // Grab the appropriate field from __cpu_model.
14148 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14149 ConstantInt::get(Int32Ty, Index)};
14150 llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
14151 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14153
14154 // Check the value of the field against the requested value.
14155 return Builder.CreateICmpEQ(CpuValue,
14156 llvm::ConstantInt::get(Int32Ty, Value));
14157}
14158
14159Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14160 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14161 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14162 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14163 return Builder.getFalse();
14164 return EmitX86CpuSupports(FeatureStr);
14165}
14166
14167Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14168 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14169}
14170
14171llvm::Value *
14172CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14173 Value *Result = Builder.getTrue();
14174 if (FeatureMask[0] != 0) {
14175 // Matching the struct layout from the compiler-rt/libgcc structure that is
14176 // filled in:
14177 // unsigned int __cpu_vendor;
14178 // unsigned int __cpu_type;
14179 // unsigned int __cpu_subtype;
14180 // unsigned int __cpu_features[1];
14181 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14182 llvm::ArrayType::get(Int32Ty, 1));
14183
14184 // Grab the global __cpu_model.
14185 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14186 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14187
14188 // Grab the first (0th) element from the field __cpu_features off of the
14189 // global in the struct STy.
14190 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14191 Builder.getInt32(0)};
14192 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
14193 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14195
14196 // Check the value of the bit corresponding to the feature requested.
14197 Value *Mask = Builder.getInt32(FeatureMask[0]);
14198 Value *Bitset = Builder.CreateAnd(Features, Mask);
14199 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14200 Result = Builder.CreateAnd(Result, Cmp);
14201 }
14202
14203 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14204 llvm::Constant *CpuFeatures2 =
14205 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14206 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14207 for (int i = 1; i != 4; ++i) {
14208 const uint32_t M = FeatureMask[i];
14209 if (!M)
14210 continue;
14211 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14212 Value *Features = Builder.CreateAlignedLoad(
14213 Int32Ty, Builder.CreateGEP(ATy, CpuFeatures2, Idxs),
14215 // Check the value of the bit corresponding to the feature requested.
14216 Value *Mask = Builder.getInt32(M);
14217 Value *Bitset = Builder.CreateAnd(Features, Mask);
14218 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14219 Result = Builder.CreateAnd(Result, Cmp);
14220 }
14221
14222 return Result;
14223}
14224
14225Value *CodeGenFunction::EmitAArch64CpuInit() {
14226 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14227 llvm::FunctionCallee Func =
14228 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14229 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14230 cast<llvm::GlobalValue>(Func.getCallee())
14231 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14232 return Builder.CreateCall(Func);
14233}
14234
14235Value *CodeGenFunction::EmitX86CpuInit() {
14236 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14237 /*Variadic*/ false);
14238 llvm::FunctionCallee Func =
14239 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14240 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14241 cast<llvm::GlobalValue>(Func.getCallee())
14242 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14243 return Builder.CreateCall(Func);
14244}
14245
14246Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14247 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14248 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14250 ArgStr.split(Features, "+");
14251 for (auto &Feature : Features) {
14252 Feature = Feature.trim();
14253 if (!llvm::AArch64::parseArchExtension(Feature))
14254 return Builder.getFalse();
14255 if (Feature != "default")
14256 Features.push_back(Feature);
14257 }
14258 return EmitAArch64CpuSupports(Features);
14259}
14260
14261llvm::Value *
14262CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14263 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14264 Value *Result = Builder.getTrue();
14265 if (FeaturesMask != 0) {
14266 // Get features from structure in runtime library
14267 // struct {
14268 // unsigned long long features;
14269 // } __aarch64_cpu_features;
14270 llvm::Type *STy = llvm::StructType::get(Int64Ty);
14271 llvm::Constant *AArch64CPUFeatures =
14272 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14273 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14274 llvm::Value *CpuFeatures = Builder.CreateGEP(
14275 STy, AArch64CPUFeatures,
14276 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14277 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14279 Value *Mask = Builder.getInt64(FeaturesMask);
14280 Value *Bitset = Builder.CreateAnd(Features, Mask);
14281 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14282 Result = Builder.CreateAnd(Result, Cmp);
14283 }
14284 return Result;
14285}
14286
14287Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14288 const CallExpr *E) {
14289 if (BuiltinID == Builtin::BI__builtin_cpu_is)
14290 return EmitX86CpuIs(E);
14291 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14292 return EmitX86CpuSupports(E);
14293 if (BuiltinID == Builtin::BI__builtin_cpu_init)
14294 return EmitX86CpuInit();
14295
14296 // Handle MSVC intrinsics before argument evaluation to prevent double
14297 // evaluation.
14298 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
14299 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
14300
14302 bool IsMaskFCmp = false;
14303 bool IsConjFMA = false;
14304
14305 // Find out if any arguments are required to be integer constant expressions.
14306 unsigned ICEArguments = 0;
14308 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
14309 assert(Error == ASTContext::GE_None && "Should not codegen an error");
14310
14311 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
14312 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
14313 }
14314
14315 // These exist so that the builtin that takes an immediate can be bounds
14316 // checked by clang to avoid passing bad immediates to the backend. Since
14317 // AVX has a larger immediate than SSE we would need separate builtins to
14318 // do the different bounds checking. Rather than create a clang specific
14319 // SSE only builtin, this implements eight separate builtins to match gcc
14320 // implementation.
14321 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
14322 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
14323 llvm::Function *F = CGM.getIntrinsic(ID);
14324 return Builder.CreateCall(F, Ops);
14325 };
14326
14327 // For the vector forms of FP comparisons, translate the builtins directly to
14328 // IR.
14329 // TODO: The builtins could be removed if the SSE header files used vector
14330 // extension comparisons directly (vector ordered/unordered may need
14331 // additional support via __builtin_isnan()).
14332 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
14333 bool IsSignaling) {
14334 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14335 Value *Cmp;
14336 if (IsSignaling)
14337 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
14338 else
14339 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
14340 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
14341 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
14342 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
14343 return Builder.CreateBitCast(Sext, FPVecTy);
14344 };
14345
14346 switch (BuiltinID) {
14347 default: return nullptr;
14348 case X86::BI_mm_prefetch: {
14349 Value *Address = Ops[0];
14350 ConstantInt *C = cast<ConstantInt>(Ops[1]);
14351 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
14352 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
14353 Value *Data = ConstantInt::get(Int32Ty, 1);
14354 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
14355 return Builder.CreateCall(F, {Address, RW, Locality, Data});
14356 }
14357 case X86::BI_mm_clflush: {
14358 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
14359 Ops[0]);
14360 }
14361 case X86::BI_mm_lfence: {
14362 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
14363 }
14364 case X86::BI_mm_mfence: {
14365 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
14366 }
14367 case X86::BI_mm_sfence: {
14368 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
14369 }
14370 case X86::BI_mm_pause: {
14371 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
14372 }
14373 case X86::BI__rdtsc: {
14374 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
14375 }
14376 case X86::BI__builtin_ia32_rdtscp: {
14377 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
14378 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
14379 Ops[0]);
14380 return Builder.CreateExtractValue(Call, 0);
14381 }
14382 case X86::BI__builtin_ia32_lzcnt_u16:
14383 case X86::BI__builtin_ia32_lzcnt_u32:
14384 case X86::BI__builtin_ia32_lzcnt_u64: {
14385 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14386 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14387 }
14388 case X86::BI__builtin_ia32_tzcnt_u16:
14389 case X86::BI__builtin_ia32_tzcnt_u32:
14390 case X86::BI__builtin_ia32_tzcnt_u64: {
14391 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
14392 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14393 }
14394 case X86::BI__builtin_ia32_undef128:
14395 case X86::BI__builtin_ia32_undef256:
14396 case X86::BI__builtin_ia32_undef512:
14397 // The x86 definition of "undef" is not the same as the LLVM definition
14398 // (PR32176). We leave optimizing away an unnecessary zero constant to the
14399 // IR optimizer and backend.
14400 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
14401 // value, we should use that here instead of a zero.
14402 return llvm::Constant::getNullValue(ConvertType(E->getType()));
14403 case X86::BI__builtin_ia32_vec_init_v8qi:
14404 case X86::BI__builtin_ia32_vec_init_v4hi:
14405 case X86::BI__builtin_ia32_vec_init_v2si:
14406 return Builder.CreateBitCast(BuildVector(Ops),
14407 llvm::Type::getX86_MMXTy(getLLVMContext()));
14408 case X86::BI__builtin_ia32_vec_ext_v2si:
14409 case X86::BI__builtin_ia32_vec_ext_v16qi:
14410 case X86::BI__builtin_ia32_vec_ext_v8hi:
14411 case X86::BI__builtin_ia32_vec_ext_v4si:
14412 case X86::BI__builtin_ia32_vec_ext_v4sf:
14413 case X86::BI__builtin_ia32_vec_ext_v2di:
14414 case X86::BI__builtin_ia32_vec_ext_v32qi:
14415 case X86::BI__builtin_ia32_vec_ext_v16hi:
14416 case X86::BI__builtin_ia32_vec_ext_v8si:
14417 case X86::BI__builtin_ia32_vec_ext_v4di: {
14418 unsigned NumElts =
14419 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14420 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14421 Index &= NumElts - 1;
14422 // These builtins exist so we can ensure the index is an ICE and in range.
14423 // Otherwise we could just do this in the header file.
14424 return Builder.CreateExtractElement(Ops[0], Index);
14425 }
14426 case X86::BI__builtin_ia32_vec_set_v16qi:
14427 case X86::BI__builtin_ia32_vec_set_v8hi:
14428 case X86::BI__builtin_ia32_vec_set_v4si:
14429 case X86::BI__builtin_ia32_vec_set_v2di:
14430 case X86::BI__builtin_ia32_vec_set_v32qi:
14431 case X86::BI__builtin_ia32_vec_set_v16hi:
14432 case X86::BI__builtin_ia32_vec_set_v8si:
14433 case X86::BI__builtin_ia32_vec_set_v4di: {
14434 unsigned NumElts =
14435 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14436 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14437 Index &= NumElts - 1;
14438 // These builtins exist so we can ensure the index is an ICE and in range.
14439 // Otherwise we could just do this in the header file.
14440 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
14441 }
14442 case X86::BI_mm_setcsr:
14443 case X86::BI__builtin_ia32_ldmxcsr: {
14444 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
14445 Builder.CreateStore(Ops[0], Tmp);
14446 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
14447 Tmp.getPointer());
14448 }
14449 case X86::BI_mm_getcsr:
14450 case X86::BI__builtin_ia32_stmxcsr: {
14451 RawAddress Tmp = CreateMemTemp(E->getType());
14452 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
14453 Tmp.getPointer());
14454 return Builder.CreateLoad(Tmp, "stmxcsr");
14455 }
14456 case X86::BI__builtin_ia32_xsave:
14457 case X86::BI__builtin_ia32_xsave64:
14458 case X86::BI__builtin_ia32_xrstor:
14459 case X86::BI__builtin_ia32_xrstor64:
14460 case X86::BI__builtin_ia32_xsaveopt:
14461 case X86::BI__builtin_ia32_xsaveopt64:
14462 case X86::BI__builtin_ia32_xrstors:
14463 case X86::BI__builtin_ia32_xrstors64:
14464 case X86::BI__builtin_ia32_xsavec:
14465 case X86::BI__builtin_ia32_xsavec64:
14466 case X86::BI__builtin_ia32_xsaves:
14467 case X86::BI__builtin_ia32_xsaves64:
14468 case X86::BI__builtin_ia32_xsetbv:
14469 case X86::BI_xsetbv: {
14470 Intrinsic::ID ID;
14471#define INTRINSIC_X86_XSAVE_ID(NAME) \
14472 case X86::BI__builtin_ia32_##NAME: \
14473 ID = Intrinsic::x86_##NAME; \
14474 break
14475 switch (BuiltinID) {
14476 default: llvm_unreachable("Unsupported intrinsic!");
14478 INTRINSIC_X86_XSAVE_ID(xsave64);
14479 INTRINSIC_X86_XSAVE_ID(xrstor);
14480 INTRINSIC_X86_XSAVE_ID(xrstor64);
14481 INTRINSIC_X86_XSAVE_ID(xsaveopt);
14482 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
14483 INTRINSIC_X86_XSAVE_ID(xrstors);
14484 INTRINSIC_X86_XSAVE_ID(xrstors64);
14485 INTRINSIC_X86_XSAVE_ID(xsavec);
14486 INTRINSIC_X86_XSAVE_ID(xsavec64);
14487 INTRINSIC_X86_XSAVE_ID(xsaves);
14488 INTRINSIC_X86_XSAVE_ID(xsaves64);
14489 INTRINSIC_X86_XSAVE_ID(xsetbv);
14490 case X86::BI_xsetbv:
14491 ID = Intrinsic::x86_xsetbv;
14492 break;
14493 }
14494#undef INTRINSIC_X86_XSAVE_ID
14495 Value *Mhi = Builder.CreateTrunc(
14496 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
14497 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
14498 Ops[1] = Mhi;
14499 Ops.push_back(Mlo);
14500 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14501 }
14502 case X86::BI__builtin_ia32_xgetbv:
14503 case X86::BI_xgetbv:
14504 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
14505 case X86::BI__builtin_ia32_storedqudi128_mask:
14506 case X86::BI__builtin_ia32_storedqusi128_mask:
14507 case X86::BI__builtin_ia32_storedquhi128_mask:
14508 case X86::BI__builtin_ia32_storedquqi128_mask:
14509 case X86::BI__builtin_ia32_storeupd128_mask:
14510 case X86::BI__builtin_ia32_storeups128_mask:
14511 case X86::BI__builtin_ia32_storedqudi256_mask:
14512 case X86::BI__builtin_ia32_storedqusi256_mask:
14513 case X86::BI__builtin_ia32_storedquhi256_mask:
14514 case X86::BI__builtin_ia32_storedquqi256_mask:
14515 case X86::BI__builtin_ia32_storeupd256_mask:
14516 case X86::BI__builtin_ia32_storeups256_mask:
14517 case X86::BI__builtin_ia32_storedqudi512_mask:
14518 case X86::BI__builtin_ia32_storedqusi512_mask:
14519 case X86::BI__builtin_ia32_storedquhi512_mask:
14520 case X86::BI__builtin_ia32_storedquqi512_mask:
14521 case X86::BI__builtin_ia32_storeupd512_mask:
14522 case X86::BI__builtin_ia32_storeups512_mask:
14523 return EmitX86MaskedStore(*this, Ops, Align(1));
14524
14525 case X86::BI__builtin_ia32_storesh128_mask:
14526 case X86::BI__builtin_ia32_storess128_mask:
14527 case X86::BI__builtin_ia32_storesd128_mask:
14528 return EmitX86MaskedStore(*this, Ops, Align(1));
14529
14530 case X86::BI__builtin_ia32_vpopcntb_128:
14531 case X86::BI__builtin_ia32_vpopcntd_128:
14532 case X86::BI__builtin_ia32_vpopcntq_128:
14533 case X86::BI__builtin_ia32_vpopcntw_128:
14534 case X86::BI__builtin_ia32_vpopcntb_256:
14535 case X86::BI__builtin_ia32_vpopcntd_256:
14536 case X86::BI__builtin_ia32_vpopcntq_256:
14537 case X86::BI__builtin_ia32_vpopcntw_256:
14538 case X86::BI__builtin_ia32_vpopcntb_512:
14539 case X86::BI__builtin_ia32_vpopcntd_512:
14540 case X86::BI__builtin_ia32_vpopcntq_512:
14541 case X86::BI__builtin_ia32_vpopcntw_512: {
14542 llvm::Type *ResultType = ConvertType(E->getType());
14543 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
14544 return Builder.CreateCall(F, Ops);
14545 }
14546 case X86::BI__builtin_ia32_cvtmask2b128:
14547 case X86::BI__builtin_ia32_cvtmask2b256:
14548 case X86::BI__builtin_ia32_cvtmask2b512:
14549 case X86::BI__builtin_ia32_cvtmask2w128:
14550 case X86::BI__builtin_ia32_cvtmask2w256:
14551 case X86::BI__builtin_ia32_cvtmask2w512:
14552 case X86::BI__builtin_ia32_cvtmask2d128:
14553 case X86::BI__builtin_ia32_cvtmask2d256:
14554 case X86::BI__builtin_ia32_cvtmask2d512:
14555 case X86::BI__builtin_ia32_cvtmask2q128:
14556 case X86::BI__builtin_ia32_cvtmask2q256:
14557 case X86::BI__builtin_ia32_cvtmask2q512:
14558 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
14559
14560 case X86::BI__builtin_ia32_cvtb2mask128:
14561 case X86::BI__builtin_ia32_cvtb2mask256:
14562 case X86::BI__builtin_ia32_cvtb2mask512:
14563 case X86::BI__builtin_ia32_cvtw2mask128:
14564 case X86::BI__builtin_ia32_cvtw2mask256:
14565 case X86::BI__builtin_ia32_cvtw2mask512:
14566 case X86::BI__builtin_ia32_cvtd2mask128:
14567 case X86::BI__builtin_ia32_cvtd2mask256:
14568 case X86::BI__builtin_ia32_cvtd2mask512:
14569 case X86::BI__builtin_ia32_cvtq2mask128:
14570 case X86::BI__builtin_ia32_cvtq2mask256:
14571 case X86::BI__builtin_ia32_cvtq2mask512:
14572 return EmitX86ConvertToMask(*this, Ops[0]);
14573
14574 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
14575 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
14576 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
14577 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
14578 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
14579 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
14580 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
14581 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
14582 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
14583 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
14584 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
14585 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
14586 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
14587 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
14588
14589 case X86::BI__builtin_ia32_vfmaddss3:
14590 case X86::BI__builtin_ia32_vfmaddsd3:
14591 case X86::BI__builtin_ia32_vfmaddsh3_mask:
14592 case X86::BI__builtin_ia32_vfmaddss3_mask:
14593 case X86::BI__builtin_ia32_vfmaddsd3_mask:
14594 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
14595 case X86::BI__builtin_ia32_vfmaddss:
14596 case X86::BI__builtin_ia32_vfmaddsd:
14597 return EmitScalarFMAExpr(*this, E, Ops,
14598 Constant::getNullValue(Ops[0]->getType()));
14599 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
14600 case X86::BI__builtin_ia32_vfmaddss3_maskz:
14601 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
14602 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
14603 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
14604 case X86::BI__builtin_ia32_vfmaddss3_mask3:
14605 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
14606 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
14607 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
14608 case X86::BI__builtin_ia32_vfmsubss3_mask3:
14609 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
14610 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
14611 /*NegAcc*/ true);
14612 case X86::BI__builtin_ia32_vfmaddph:
14613 case X86::BI__builtin_ia32_vfmaddps:
14614 case X86::BI__builtin_ia32_vfmaddpd:
14615 case X86::BI__builtin_ia32_vfmaddph256:
14616 case X86::BI__builtin_ia32_vfmaddps256:
14617 case X86::BI__builtin_ia32_vfmaddpd256:
14618 case X86::BI__builtin_ia32_vfmaddph512_mask:
14619 case X86::BI__builtin_ia32_vfmaddph512_maskz:
14620 case X86::BI__builtin_ia32_vfmaddph512_mask3:
14621 case X86::BI__builtin_ia32_vfmaddps512_mask:
14622 case X86::BI__builtin_ia32_vfmaddps512_maskz:
14623 case X86::BI__builtin_ia32_vfmaddps512_mask3:
14624 case X86::BI__builtin_ia32_vfmsubps512_mask3:
14625 case X86::BI__builtin_ia32_vfmaddpd512_mask:
14626 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
14627 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
14628 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
14629 case X86::BI__builtin_ia32_vfmsubph512_mask3:
14630 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
14631 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
14632 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14633 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14634 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14635 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
14636 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14637 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14638 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14639 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14640 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14641 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14642 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14643 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
14644
14645 case X86::BI__builtin_ia32_movdqa32store128_mask:
14646 case X86::BI__builtin_ia32_movdqa64store128_mask:
14647 case X86::BI__builtin_ia32_storeaps128_mask:
14648 case X86::BI__builtin_ia32_storeapd128_mask:
14649 case X86::BI__builtin_ia32_movdqa32store256_mask:
14650 case X86::BI__builtin_ia32_movdqa64store256_mask:
14651 case X86::BI__builtin_ia32_storeaps256_mask:
14652 case X86::BI__builtin_ia32_storeapd256_mask:
14653 case X86::BI__builtin_ia32_movdqa32store512_mask:
14654 case X86::BI__builtin_ia32_movdqa64store512_mask:
14655 case X86::BI__builtin_ia32_storeaps512_mask:
14656 case X86::BI__builtin_ia32_storeapd512_mask:
14657 return EmitX86MaskedStore(
14658 *this, Ops,
14659 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14660
14661 case X86::BI__builtin_ia32_loadups128_mask:
14662 case X86::BI__builtin_ia32_loadups256_mask:
14663 case X86::BI__builtin_ia32_loadups512_mask:
14664 case X86::BI__builtin_ia32_loadupd128_mask:
14665 case X86::BI__builtin_ia32_loadupd256_mask:
14666 case X86::BI__builtin_ia32_loadupd512_mask:
14667 case X86::BI__builtin_ia32_loaddquqi128_mask:
14668 case X86::BI__builtin_ia32_loaddquqi256_mask:
14669 case X86::BI__builtin_ia32_loaddquqi512_mask:
14670 case X86::BI__builtin_ia32_loaddquhi128_mask:
14671 case X86::BI__builtin_ia32_loaddquhi256_mask:
14672 case X86::BI__builtin_ia32_loaddquhi512_mask:
14673 case X86::BI__builtin_ia32_loaddqusi128_mask:
14674 case X86::BI__builtin_ia32_loaddqusi256_mask:
14675 case X86::BI__builtin_ia32_loaddqusi512_mask:
14676 case X86::BI__builtin_ia32_loaddqudi128_mask:
14677 case X86::BI__builtin_ia32_loaddqudi256_mask:
14678 case X86::BI__builtin_ia32_loaddqudi512_mask:
14679 return EmitX86MaskedLoad(*this, Ops, Align(1));
14680
14681 case X86::BI__builtin_ia32_loadsh128_mask:
14682 case X86::BI__builtin_ia32_loadss128_mask:
14683 case X86::BI__builtin_ia32_loadsd128_mask:
14684 return EmitX86MaskedLoad(*this, Ops, Align(1));
14685
14686 case X86::BI__builtin_ia32_loadaps128_mask:
14687 case X86::BI__builtin_ia32_loadaps256_mask:
14688 case X86::BI__builtin_ia32_loadaps512_mask:
14689 case X86::BI__builtin_ia32_loadapd128_mask:
14690 case X86::BI__builtin_ia32_loadapd256_mask:
14691 case X86::BI__builtin_ia32_loadapd512_mask:
14692 case X86::BI__builtin_ia32_movdqa32load128_mask:
14693 case X86::BI__builtin_ia32_movdqa32load256_mask:
14694 case X86::BI__builtin_ia32_movdqa32load512_mask:
14695 case X86::BI__builtin_ia32_movdqa64load128_mask:
14696 case X86::BI__builtin_ia32_movdqa64load256_mask:
14697 case X86::BI__builtin_ia32_movdqa64load512_mask:
14698 return EmitX86MaskedLoad(
14699 *this, Ops,
14700 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14701
14702 case X86::BI__builtin_ia32_expandloaddf128_mask:
14703 case X86::BI__builtin_ia32_expandloaddf256_mask:
14704 case X86::BI__builtin_ia32_expandloaddf512_mask:
14705 case X86::BI__builtin_ia32_expandloadsf128_mask:
14706 case X86::BI__builtin_ia32_expandloadsf256_mask:
14707 case X86::BI__builtin_ia32_expandloadsf512_mask:
14708 case X86::BI__builtin_ia32_expandloaddi128_mask:
14709 case X86::BI__builtin_ia32_expandloaddi256_mask:
14710 case X86::BI__builtin_ia32_expandloaddi512_mask:
14711 case X86::BI__builtin_ia32_expandloadsi128_mask:
14712 case X86::BI__builtin_ia32_expandloadsi256_mask:
14713 case X86::BI__builtin_ia32_expandloadsi512_mask:
14714 case X86::BI__builtin_ia32_expandloadhi128_mask:
14715 case X86::BI__builtin_ia32_expandloadhi256_mask:
14716 case X86::BI__builtin_ia32_expandloadhi512_mask:
14717 case X86::BI__builtin_ia32_expandloadqi128_mask:
14718 case X86::BI__builtin_ia32_expandloadqi256_mask:
14719 case X86::BI__builtin_ia32_expandloadqi512_mask:
14720 return EmitX86ExpandLoad(*this, Ops);
14721
14722 case X86::BI__builtin_ia32_compressstoredf128_mask:
14723 case X86::BI__builtin_ia32_compressstoredf256_mask:
14724 case X86::BI__builtin_ia32_compressstoredf512_mask:
14725 case X86::BI__builtin_ia32_compressstoresf128_mask:
14726 case X86::BI__builtin_ia32_compressstoresf256_mask:
14727 case X86::BI__builtin_ia32_compressstoresf512_mask:
14728 case X86::BI__builtin_ia32_compressstoredi128_mask:
14729 case X86::BI__builtin_ia32_compressstoredi256_mask:
14730 case X86::BI__builtin_ia32_compressstoredi512_mask:
14731 case X86::BI__builtin_ia32_compressstoresi128_mask:
14732 case X86::BI__builtin_ia32_compressstoresi256_mask:
14733 case X86::BI__builtin_ia32_compressstoresi512_mask:
14734 case X86::BI__builtin_ia32_compressstorehi128_mask:
14735 case X86::BI__builtin_ia32_compressstorehi256_mask:
14736 case X86::BI__builtin_ia32_compressstorehi512_mask:
14737 case X86::BI__builtin_ia32_compressstoreqi128_mask:
14738 case X86::BI__builtin_ia32_compressstoreqi256_mask:
14739 case X86::BI__builtin_ia32_compressstoreqi512_mask:
14740 return EmitX86CompressStore(*this, Ops);
14741
14742 case X86::BI__builtin_ia32_expanddf128_mask:
14743 case X86::BI__builtin_ia32_expanddf256_mask:
14744 case X86::BI__builtin_ia32_expanddf512_mask:
14745 case X86::BI__builtin_ia32_expandsf128_mask:
14746 case X86::BI__builtin_ia32_expandsf256_mask:
14747 case X86::BI__builtin_ia32_expandsf512_mask:
14748 case X86::BI__builtin_ia32_expanddi128_mask:
14749 case X86::BI__builtin_ia32_expanddi256_mask:
14750 case X86::BI__builtin_ia32_expanddi512_mask:
14751 case X86::BI__builtin_ia32_expandsi128_mask:
14752 case X86::BI__builtin_ia32_expandsi256_mask:
14753 case X86::BI__builtin_ia32_expandsi512_mask:
14754 case X86::BI__builtin_ia32_expandhi128_mask:
14755 case X86::BI__builtin_ia32_expandhi256_mask:
14756 case X86::BI__builtin_ia32_expandhi512_mask:
14757 case X86::BI__builtin_ia32_expandqi128_mask:
14758 case X86::BI__builtin_ia32_expandqi256_mask:
14759 case X86::BI__builtin_ia32_expandqi512_mask:
14760 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
14761
14762 case X86::BI__builtin_ia32_compressdf128_mask:
14763 case X86::BI__builtin_ia32_compressdf256_mask:
14764 case X86::BI__builtin_ia32_compressdf512_mask:
14765 case X86::BI__builtin_ia32_compresssf128_mask:
14766 case X86::BI__builtin_ia32_compresssf256_mask:
14767 case X86::BI__builtin_ia32_compresssf512_mask:
14768 case X86::BI__builtin_ia32_compressdi128_mask:
14769 case X86::BI__builtin_ia32_compressdi256_mask:
14770 case X86::BI__builtin_ia32_compressdi512_mask:
14771 case X86::BI__builtin_ia32_compresssi128_mask:
14772 case X86::BI__builtin_ia32_compresssi256_mask:
14773 case X86::BI__builtin_ia32_compresssi512_mask:
14774 case X86::BI__builtin_ia32_compresshi128_mask:
14775 case X86::BI__builtin_ia32_compresshi256_mask:
14776 case X86::BI__builtin_ia32_compresshi512_mask:
14777 case X86::BI__builtin_ia32_compressqi128_mask:
14778 case X86::BI__builtin_ia32_compressqi256_mask:
14779 case X86::BI__builtin_ia32_compressqi512_mask:
14780 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
14781
14782 case X86::BI__builtin_ia32_gather3div2df:
14783 case X86::BI__builtin_ia32_gather3div2di:
14784 case X86::BI__builtin_ia32_gather3div4df:
14785 case X86::BI__builtin_ia32_gather3div4di:
14786 case X86::BI__builtin_ia32_gather3div4sf:
14787 case X86::BI__builtin_ia32_gather3div4si:
14788 case X86::BI__builtin_ia32_gather3div8sf:
14789 case X86::BI__builtin_ia32_gather3div8si:
14790 case X86::BI__builtin_ia32_gather3siv2df:
14791 case X86::BI__builtin_ia32_gather3siv2di:
14792 case X86::BI__builtin_ia32_gather3siv4df:
14793 case X86::BI__builtin_ia32_gather3siv4di:
14794 case X86::BI__builtin_ia32_gather3siv4sf:
14795 case X86::BI__builtin_ia32_gather3siv4si:
14796 case X86::BI__builtin_ia32_gather3siv8sf:
14797 case X86::BI__builtin_ia32_gather3siv8si:
14798 case X86::BI__builtin_ia32_gathersiv8df:
14799 case X86::BI__builtin_ia32_gathersiv16sf:
14800 case X86::BI__builtin_ia32_gatherdiv8df:
14801 case X86::BI__builtin_ia32_gatherdiv16sf:
14802 case X86::BI__builtin_ia32_gathersiv8di:
14803 case X86::BI__builtin_ia32_gathersiv16si:
14804 case X86::BI__builtin_ia32_gatherdiv8di:
14805 case X86::BI__builtin_ia32_gatherdiv16si: {
14806 Intrinsic::ID IID;
14807 switch (BuiltinID) {
14808 default: llvm_unreachable("Unexpected builtin");
14809 case X86::BI__builtin_ia32_gather3div2df:
14810 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
14811 break;
14812 case X86::BI__builtin_ia32_gather3div2di:
14813 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
14814 break;
14815 case X86::BI__builtin_ia32_gather3div4df:
14816 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
14817 break;
14818 case X86::BI__builtin_ia32_gather3div4di:
14819 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
14820 break;
14821 case X86::BI__builtin_ia32_gather3div4sf:
14822 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
14823 break;
14824 case X86::BI__builtin_ia32_gather3div4si:
14825 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
14826 break;
14827 case X86::BI__builtin_ia32_gather3div8sf:
14828 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
14829 break;
14830 case X86::BI__builtin_ia32_gather3div8si:
14831 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
14832 break;
14833 case X86::BI__builtin_ia32_gather3siv2df:
14834 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
14835 break;
14836 case X86::BI__builtin_ia32_gather3siv2di:
14837 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
14838 break;
14839 case X86::BI__builtin_ia32_gather3siv4df:
14840 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
14841 break;
14842 case X86::BI__builtin_ia32_gather3siv4di:
14843 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
14844 break;
14845 case X86::BI__builtin_ia32_gather3siv4sf:
14846 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
14847 break;
14848 case X86::BI__builtin_ia32_gather3siv4si:
14849 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
14850 break;
14851 case X86::BI__builtin_ia32_gather3siv8sf:
14852 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
14853 break;
14854 case X86::BI__builtin_ia32_gather3siv8si:
14855 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
14856 break;
14857 case X86::BI__builtin_ia32_gathersiv8df:
14858 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
14859 break;
14860 case X86::BI__builtin_ia32_gathersiv16sf:
14861 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
14862 break;
14863 case X86::BI__builtin_ia32_gatherdiv8df:
14864 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
14865 break;
14866 case X86::BI__builtin_ia32_gatherdiv16sf:
14867 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
14868 break;
14869 case X86::BI__builtin_ia32_gathersiv8di:
14870 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
14871 break;
14872 case X86::BI__builtin_ia32_gathersiv16si:
14873 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
14874 break;
14875 case X86::BI__builtin_ia32_gatherdiv8di:
14876 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
14877 break;
14878 case X86::BI__builtin_ia32_gatherdiv16si:
14879 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
14880 break;
14881 }
14882
14883 unsigned MinElts = std::min(
14884 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
14885 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
14886 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
14887 Function *Intr = CGM.getIntrinsic(IID);
14888 return Builder.CreateCall(Intr, Ops);
14889 }
14890
14891 case X86::BI__builtin_ia32_scattersiv8df:
14892 case X86::BI__builtin_ia32_scattersiv16sf:
14893 case X86::BI__builtin_ia32_scatterdiv8df:
14894 case X86::BI__builtin_ia32_scatterdiv16sf:
14895 case X86::BI__builtin_ia32_scattersiv8di:
14896 case X86::BI__builtin_ia32_scattersiv16si:
14897 case X86::BI__builtin_ia32_scatterdiv8di:
14898 case X86::BI__builtin_ia32_scatterdiv16si:
14899 case X86::BI__builtin_ia32_scatterdiv2df:
14900 case X86::BI__builtin_ia32_scatterdiv2di:
14901 case X86::BI__builtin_ia32_scatterdiv4df:
14902 case X86::BI__builtin_ia32_scatterdiv4di:
14903 case X86::BI__builtin_ia32_scatterdiv4sf:
14904 case X86::BI__builtin_ia32_scatterdiv4si:
14905 case X86::BI__builtin_ia32_scatterdiv8sf:
14906 case X86::BI__builtin_ia32_scatterdiv8si:
14907 case X86::BI__builtin_ia32_scattersiv2df:
14908 case X86::BI__builtin_ia32_scattersiv2di:
14909 case X86::BI__builtin_ia32_scattersiv4df:
14910 case X86::BI__builtin_ia32_scattersiv4di:
14911 case X86::BI__builtin_ia32_scattersiv4sf:
14912 case X86::BI__builtin_ia32_scattersiv4si:
14913 case X86::BI__builtin_ia32_scattersiv8sf:
14914 case X86::BI__builtin_ia32_scattersiv8si: {
14915 Intrinsic::ID IID;
14916 switch (BuiltinID) {
14917 default: llvm_unreachable("Unexpected builtin");
14918 case X86::BI__builtin_ia32_scattersiv8df:
14919 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
14920 break;
14921 case X86::BI__builtin_ia32_scattersiv16sf:
14922 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
14923 break;
14924 case X86::BI__builtin_ia32_scatterdiv8df:
14925 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
14926 break;
14927 case X86::BI__builtin_ia32_scatterdiv16sf:
14928 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
14929 break;
14930 case X86::BI__builtin_ia32_scattersiv8di:
14931 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
14932 break;
14933 case X86::BI__builtin_ia32_scattersiv16si:
14934 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
14935 break;
14936 case X86::BI__builtin_ia32_scatterdiv8di:
14937 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
14938 break;
14939 case X86::BI__builtin_ia32_scatterdiv16si:
14940 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
14941 break;
14942 case X86::BI__builtin_ia32_scatterdiv2df:
14943 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
14944 break;
14945 case X86::BI__builtin_ia32_scatterdiv2di:
14946 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
14947 break;
14948 case X86::BI__builtin_ia32_scatterdiv4df:
14949 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
14950 break;
14951 case X86::BI__builtin_ia32_scatterdiv4di:
14952 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
14953 break;
14954 case X86::BI__builtin_ia32_scatterdiv4sf:
14955 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
14956 break;
14957 case X86::BI__builtin_ia32_scatterdiv4si:
14958 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
14959 break;
14960 case X86::BI__builtin_ia32_scatterdiv8sf:
14961 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
14962 break;
14963 case X86::BI__builtin_ia32_scatterdiv8si:
14964 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
14965 break;
14966 case X86::BI__builtin_ia32_scattersiv2df:
14967 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
14968 break;
14969 case X86::BI__builtin_ia32_scattersiv2di:
14970 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
14971 break;
14972 case X86::BI__builtin_ia32_scattersiv4df:
14973 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
14974 break;
14975 case X86::BI__builtin_ia32_scattersiv4di:
14976 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
14977 break;
14978 case X86::BI__builtin_ia32_scattersiv4sf:
14979 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
14980 break;
14981 case X86::BI__builtin_ia32_scattersiv4si:
14982 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
14983 break;
14984 case X86::BI__builtin_ia32_scattersiv8sf:
14985 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
14986 break;
14987 case X86::BI__builtin_ia32_scattersiv8si:
14988 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
14989 break;
14990 }
14991
14992 unsigned MinElts = std::min(
14993 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
14994 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
14995 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
14996 Function *Intr = CGM.getIntrinsic(IID);
14997 return Builder.CreateCall(Intr, Ops);
14998 }
14999
15000 case X86::BI__builtin_ia32_vextractf128_pd256:
15001 case X86::BI__builtin_ia32_vextractf128_ps256:
15002 case X86::BI__builtin_ia32_vextractf128_si256:
15003 case X86::BI__builtin_ia32_extract128i256:
15004 case X86::BI__builtin_ia32_extractf64x4_mask:
15005 case X86::BI__builtin_ia32_extractf32x4_mask:
15006 case X86::BI__builtin_ia32_extracti64x4_mask:
15007 case X86::BI__builtin_ia32_extracti32x4_mask:
15008 case X86::BI__builtin_ia32_extractf32x8_mask:
15009 case X86::BI__builtin_ia32_extracti32x8_mask:
15010 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15011 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15012 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15013 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15014 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15015 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15016 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15017 unsigned NumElts = DstTy->getNumElements();
15018 unsigned SrcNumElts =
15019 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15020 unsigned SubVectors = SrcNumElts / NumElts;
15021 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15022 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15023 Index &= SubVectors - 1; // Remove any extra bits.
15024 Index *= NumElts;
15025
15026 int Indices[16];
15027 for (unsigned i = 0; i != NumElts; ++i)
15028 Indices[i] = i + Index;
15029
15030 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15031 "extract");
15032
15033 if (Ops.size() == 4)
15034 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15035
15036 return Res;
15037 }
15038 case X86::BI__builtin_ia32_vinsertf128_pd256:
15039 case X86::BI__builtin_ia32_vinsertf128_ps256:
15040 case X86::BI__builtin_ia32_vinsertf128_si256:
15041 case X86::BI__builtin_ia32_insert128i256:
15042 case X86::BI__builtin_ia32_insertf64x4:
15043 case X86::BI__builtin_ia32_insertf32x4:
15044 case X86::BI__builtin_ia32_inserti64x4:
15045 case X86::BI__builtin_ia32_inserti32x4:
15046 case X86::BI__builtin_ia32_insertf32x8:
15047 case X86::BI__builtin_ia32_inserti32x8:
15048 case X86::BI__builtin_ia32_insertf32x4_256:
15049 case X86::BI__builtin_ia32_inserti32x4_256:
15050 case X86::BI__builtin_ia32_insertf64x2_256:
15051 case X86::BI__builtin_ia32_inserti64x2_256:
15052 case X86::BI__builtin_ia32_insertf64x2_512:
15053 case X86::BI__builtin_ia32_inserti64x2_512: {
15054 unsigned DstNumElts =
15055 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15056 unsigned SrcNumElts =
15057 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15058 unsigned SubVectors = DstNumElts / SrcNumElts;
15059 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15060 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15061 Index &= SubVectors - 1; // Remove any extra bits.
15062 Index *= SrcNumElts;
15063
15064 int Indices[16];
15065 for (unsigned i = 0; i != DstNumElts; ++i)
15066 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15067
15068 Value *Op1 = Builder.CreateShuffleVector(
15069 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15070
15071 for (unsigned i = 0; i != DstNumElts; ++i) {
15072 if (i >= Index && i < (Index + SrcNumElts))
15073 Indices[i] = (i - Index) + DstNumElts;
15074 else
15075 Indices[i] = i;
15076 }
15077
15078 return Builder.CreateShuffleVector(Ops[0], Op1,
15079 ArrayRef(Indices, DstNumElts), "insert");
15080 }
15081 case X86::BI__builtin_ia32_pmovqd512_mask:
15082 case X86::BI__builtin_ia32_pmovwb512_mask: {
15083 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15084 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15085 }
15086 case X86::BI__builtin_ia32_pmovdb512_mask:
15087 case X86::BI__builtin_ia32_pmovdw512_mask:
15088 case X86::BI__builtin_ia32_pmovqw512_mask: {
15089 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15090 if (C->isAllOnesValue())
15091 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15092
15093 Intrinsic::ID IID;
15094 switch (BuiltinID) {
15095 default: llvm_unreachable("Unsupported intrinsic!");
15096 case X86::BI__builtin_ia32_pmovdb512_mask:
15097 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15098 break;
15099 case X86::BI__builtin_ia32_pmovdw512_mask:
15100 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15101 break;
15102 case X86::BI__builtin_ia32_pmovqw512_mask:
15103 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15104 break;
15105 }
15106
15107 Function *Intr = CGM.getIntrinsic(IID);
15108 return Builder.CreateCall(Intr, Ops);
15109 }
15110 case X86::BI__builtin_ia32_pblendw128:
15111 case X86::BI__builtin_ia32_blendpd:
15112 case X86::BI__builtin_ia32_blendps:
15113 case X86::BI__builtin_ia32_blendpd256:
15114 case X86::BI__builtin_ia32_blendps256:
15115 case X86::BI__builtin_ia32_pblendw256:
15116 case X86::BI__builtin_ia32_pblendd128:
15117 case X86::BI__builtin_ia32_pblendd256: {
15118 unsigned NumElts =
15119 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15120 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15121
15122 int Indices[16];
15123 // If there are more than 8 elements, the immediate is used twice so make
15124 // sure we handle that.
15125 for (unsigned i = 0; i != NumElts; ++i)
15126 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15127
15128 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15129 ArrayRef(Indices, NumElts), "blend");
15130 }
15131 case X86::BI__builtin_ia32_pshuflw:
15132 case X86::BI__builtin_ia32_pshuflw256:
15133 case X86::BI__builtin_ia32_pshuflw512: {
15134 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15135 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15136 unsigned NumElts = Ty->getNumElements();
15137
15138 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15139 Imm = (Imm & 0xff) * 0x01010101;
15140
15141 int Indices[32];
15142 for (unsigned l = 0; l != NumElts; l += 8) {
15143 for (unsigned i = 0; i != 4; ++i) {
15144 Indices[l + i] = l + (Imm & 3);
15145 Imm >>= 2;
15146 }
15147 for (unsigned i = 4; i != 8; ++i)
15148 Indices[l + i] = l + i;
15149 }
15150
15151 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15152 "pshuflw");
15153 }
15154 case X86::BI__builtin_ia32_pshufhw:
15155 case X86::BI__builtin_ia32_pshufhw256:
15156 case X86::BI__builtin_ia32_pshufhw512: {
15157 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15158 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15159 unsigned NumElts = Ty->getNumElements();
15160
15161 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15162 Imm = (Imm & 0xff) * 0x01010101;
15163
15164 int Indices[32];
15165 for (unsigned l = 0; l != NumElts; l += 8) {
15166 for (unsigned i = 0; i != 4; ++i)
15167 Indices[l + i] = l + i;
15168 for (unsigned i = 4; i != 8; ++i) {
15169 Indices[l + i] = l + 4 + (Imm & 3);
15170 Imm >>= 2;
15171 }
15172 }
15173
15174 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15175 "pshufhw");
15176 }
15177 case X86::BI__builtin_ia32_pshufd:
15178 case X86::BI__builtin_ia32_pshufd256:
15179 case X86::BI__builtin_ia32_pshufd512:
15180 case X86::BI__builtin_ia32_vpermilpd:
15181 case X86::BI__builtin_ia32_vpermilps:
15182 case X86::BI__builtin_ia32_vpermilpd256:
15183 case X86::BI__builtin_ia32_vpermilps256:
15184 case X86::BI__builtin_ia32_vpermilpd512:
15185 case X86::BI__builtin_ia32_vpermilps512: {
15186 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15187 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15188 unsigned NumElts = Ty->getNumElements();
15189 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15190 unsigned NumLaneElts = NumElts / NumLanes;
15191
15192 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15193 Imm = (Imm & 0xff) * 0x01010101;
15194
15195 int Indices[16];
15196 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15197 for (unsigned i = 0; i != NumLaneElts; ++i) {
15198 Indices[i + l] = (Imm % NumLaneElts) + l;
15199 Imm /= NumLaneElts;
15200 }
15201 }
15202
15203 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15204 "permil");
15205 }
15206 case X86::BI__builtin_ia32_shufpd:
15207 case X86::BI__builtin_ia32_shufpd256:
15208 case X86::BI__builtin_ia32_shufpd512:
15209 case X86::BI__builtin_ia32_shufps:
15210 case X86::BI__builtin_ia32_shufps256:
15211 case X86::BI__builtin_ia32_shufps512: {
15212 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15213 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15214 unsigned NumElts = Ty->getNumElements();
15215 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15216 unsigned NumLaneElts = NumElts / NumLanes;
15217
15218 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15219 Imm = (Imm & 0xff) * 0x01010101;
15220
15221 int Indices[16];
15222 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15223 for (unsigned i = 0; i != NumLaneElts; ++i) {
15224 unsigned Index = Imm % NumLaneElts;
15225 Imm /= NumLaneElts;
15226 if (i >= (NumLaneElts / 2))
15227 Index += NumElts;
15228 Indices[l + i] = l + Index;
15229 }
15230 }
15231
15232 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15233 ArrayRef(Indices, NumElts), "shufp");
15234 }
15235 case X86::BI__builtin_ia32_permdi256:
15236 case X86::BI__builtin_ia32_permdf256:
15237 case X86::BI__builtin_ia32_permdi512:
15238 case X86::BI__builtin_ia32_permdf512: {
15239 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15240 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15241 unsigned NumElts = Ty->getNumElements();
15242
15243 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15244 int Indices[8];
15245 for (unsigned l = 0; l != NumElts; l += 4)
15246 for (unsigned i = 0; i != 4; ++i)
15247 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15248
15249 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15250 "perm");
15251 }
15252 case X86::BI__builtin_ia32_palignr128:
15253 case X86::BI__builtin_ia32_palignr256:
15254 case X86::BI__builtin_ia32_palignr512: {
15255 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15256
15257 unsigned NumElts =
15258 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15259 assert(NumElts % 16 == 0);
15260
15261 // If palignr is shifting the pair of vectors more than the size of two
15262 // lanes, emit zero.
15263 if (ShiftVal >= 32)
15264 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15265
15266 // If palignr is shifting the pair of input vectors more than one lane,
15267 // but less than two lanes, convert to shifting in zeroes.
15268 if (ShiftVal > 16) {
15269 ShiftVal -= 16;
15270 Ops[1] = Ops[0];
15271 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
15272 }
15273
15274 int Indices[64];
15275 // 256-bit palignr operates on 128-bit lanes so we need to handle that
15276 for (unsigned l = 0; l != NumElts; l += 16) {
15277 for (unsigned i = 0; i != 16; ++i) {
15278 unsigned Idx = ShiftVal + i;
15279 if (Idx >= 16)
15280 Idx += NumElts - 16; // End of lane, switch operand.
15281 Indices[l + i] = Idx + l;
15282 }
15283 }
15284
15285 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15286 ArrayRef(Indices, NumElts), "palignr");
15287 }
15288 case X86::BI__builtin_ia32_alignd128:
15289 case X86::BI__builtin_ia32_alignd256:
15290 case X86::BI__builtin_ia32_alignd512:
15291 case X86::BI__builtin_ia32_alignq128:
15292 case X86::BI__builtin_ia32_alignq256:
15293 case X86::BI__builtin_ia32_alignq512: {
15294 unsigned NumElts =
15295 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15296 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15297
15298 // Mask the shift amount to width of a vector.
15299 ShiftVal &= NumElts - 1;
15300
15301 int Indices[16];
15302 for (unsigned i = 0; i != NumElts; ++i)
15303 Indices[i] = i + ShiftVal;
15304
15305 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15306 ArrayRef(Indices, NumElts), "valign");
15307 }
15308 case X86::BI__builtin_ia32_shuf_f32x4_256:
15309 case X86::BI__builtin_ia32_shuf_f64x2_256:
15310 case X86::BI__builtin_ia32_shuf_i32x4_256:
15311 case X86::BI__builtin_ia32_shuf_i64x2_256:
15312 case X86::BI__builtin_ia32_shuf_f32x4:
15313 case X86::BI__builtin_ia32_shuf_f64x2:
15314 case X86::BI__builtin_ia32_shuf_i32x4:
15315 case X86::BI__builtin_ia32_shuf_i64x2: {
15316 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15317 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15318 unsigned NumElts = Ty->getNumElements();
15319 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
15320 unsigned NumLaneElts = NumElts / NumLanes;
15321
15322 int Indices[16];
15323 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15324 unsigned Index = (Imm % NumLanes) * NumLaneElts;
15325 Imm /= NumLanes; // Discard the bits we just used.
15326 if (l >= (NumElts / 2))
15327 Index += NumElts; // Switch to other source.
15328 for (unsigned i = 0; i != NumLaneElts; ++i) {
15329 Indices[l + i] = Index + i;
15330 }
15331 }
15332
15333 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15334 ArrayRef(Indices, NumElts), "shuf");
15335 }
15336
15337 case X86::BI__builtin_ia32_vperm2f128_pd256:
15338 case X86::BI__builtin_ia32_vperm2f128_ps256:
15339 case X86::BI__builtin_ia32_vperm2f128_si256:
15340 case X86::BI__builtin_ia32_permti256: {
15341 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15342 unsigned NumElts =
15343 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15344
15345 // This takes a very simple approach since there are two lanes and a
15346 // shuffle can have 2 inputs. So we reserve the first input for the first
15347 // lane and the second input for the second lane. This may result in
15348 // duplicate sources, but this can be dealt with in the backend.
15349
15350 Value *OutOps[2];
15351 int Indices[8];
15352 for (unsigned l = 0; l != 2; ++l) {
15353 // Determine the source for this lane.
15354 if (Imm & (1 << ((l * 4) + 3)))
15355 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
15356 else if (Imm & (1 << ((l * 4) + 1)))
15357 OutOps[l] = Ops[1];
15358 else
15359 OutOps[l] = Ops[0];
15360
15361 for (unsigned i = 0; i != NumElts/2; ++i) {
15362 // Start with ith element of the source for this lane.
15363 unsigned Idx = (l * NumElts) + i;
15364 // If bit 0 of the immediate half is set, switch to the high half of
15365 // the source.
15366 if (Imm & (1 << (l * 4)))
15367 Idx += NumElts/2;
15368 Indices[(l * (NumElts/2)) + i] = Idx;
15369 }
15370 }
15371
15372 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
15373 ArrayRef(Indices, NumElts), "vperm");
15374 }
15375
15376 case X86::BI__builtin_ia32_pslldqi128_byteshift:
15377 case X86::BI__builtin_ia32_pslldqi256_byteshift:
15378 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
15379 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15380 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15381 // Builtin type is vXi64 so multiply by 8 to get bytes.
15382 unsigned NumElts = ResultType->getNumElements() * 8;
15383
15384 // If pslldq is shifting the vector more than 15 bytes, emit zero.
15385 if (ShiftVal >= 16)
15386 return llvm::Constant::getNullValue(ResultType);
15387
15388 int Indices[64];
15389 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
15390 for (unsigned l = 0; l != NumElts; l += 16) {
15391 for (unsigned i = 0; i != 16; ++i) {
15392 unsigned Idx = NumElts + i - ShiftVal;
15393 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
15394 Indices[l + i] = Idx + l;
15395 }
15396 }
15397
15398 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15399 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15400 Value *Zero = llvm::Constant::getNullValue(VecTy);
15401 Value *SV = Builder.CreateShuffleVector(
15402 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
15403 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
15404 }
15405 case X86::BI__builtin_ia32_psrldqi128_byteshift:
15406 case X86::BI__builtin_ia32_psrldqi256_byteshift:
15407 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
15408 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15409 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15410 // Builtin type is vXi64 so multiply by 8 to get bytes.
15411 unsigned NumElts = ResultType->getNumElements() * 8;
15412
15413 // If psrldq is shifting the vector more than 15 bytes, emit zero.
15414 if (ShiftVal >= 16)
15415 return llvm::Constant::getNullValue(ResultType);
15416
15417 int Indices[64];
15418 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
15419 for (unsigned l = 0; l != NumElts; l += 16) {
15420 for (unsigned i = 0; i != 16; ++i) {
15421 unsigned Idx = i + ShiftVal;
15422 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
15423 Indices[l + i] = Idx + l;
15424 }
15425 }
15426
15427 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15428 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15429 Value *Zero = llvm::Constant::getNullValue(VecTy);
15430 Value *SV = Builder.CreateShuffleVector(
15431 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
15432 return Builder.CreateBitCast(SV, ResultType, "cast");
15433 }
15434 case X86::BI__builtin_ia32_kshiftliqi:
15435 case X86::BI__builtin_ia32_kshiftlihi:
15436 case X86::BI__builtin_ia32_kshiftlisi:
15437 case X86::BI__builtin_ia32_kshiftlidi: {
15438 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15439 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15440
15441 if (ShiftVal >= NumElts)
15442 return llvm::Constant::getNullValue(Ops[0]->getType());
15443
15444 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15445
15446 int Indices[64];
15447 for (unsigned i = 0; i != NumElts; ++i)
15448 Indices[i] = NumElts + i - ShiftVal;
15449
15450 Value *Zero = llvm::Constant::getNullValue(In->getType());
15451 Value *SV = Builder.CreateShuffleVector(
15452 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
15453 return Builder.CreateBitCast(SV, Ops[0]->getType());
15454 }
15455 case X86::BI__builtin_ia32_kshiftriqi:
15456 case X86::BI__builtin_ia32_kshiftrihi:
15457 case X86::BI__builtin_ia32_kshiftrisi:
15458 case X86::BI__builtin_ia32_kshiftridi: {
15459 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15460 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15461
15462 if (ShiftVal >= NumElts)
15463 return llvm::Constant::getNullValue(Ops[0]->getType());
15464
15465 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15466
15467 int Indices[64];
15468 for (unsigned i = 0; i != NumElts; ++i)
15469 Indices[i] = i + ShiftVal;
15470
15471 Value *Zero = llvm::Constant::getNullValue(In->getType());
15472 Value *SV = Builder.CreateShuffleVector(
15473 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
15474 return Builder.CreateBitCast(SV, Ops[0]->getType());
15475 }
15476 case X86::BI__builtin_ia32_movnti:
15477 case X86::BI__builtin_ia32_movnti64:
15478 case X86::BI__builtin_ia32_movntsd:
15479 case X86::BI__builtin_ia32_movntss: {
15480 llvm::MDNode *Node = llvm::MDNode::get(
15481 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
15482
15483 Value *Ptr = Ops[0];
15484 Value *Src = Ops[1];
15485
15486 // Extract the 0'th element of the source vector.
15487 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
15488 BuiltinID == X86::BI__builtin_ia32_movntss)
15489 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
15490
15491 // Unaligned nontemporal store of the scalar value.
15492 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
15493 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
15494 SI->setAlignment(llvm::Align(1));
15495 return SI;
15496 }
15497 // Rotate is a special case of funnel shift - 1st 2 args are the same.
15498 case X86::BI__builtin_ia32_vprotb:
15499 case X86::BI__builtin_ia32_vprotw:
15500 case X86::BI__builtin_ia32_vprotd:
15501 case X86::BI__builtin_ia32_vprotq:
15502 case X86::BI__builtin_ia32_vprotbi:
15503 case X86::BI__builtin_ia32_vprotwi:
15504 case X86::BI__builtin_ia32_vprotdi:
15505 case X86::BI__builtin_ia32_vprotqi:
15506 case X86::BI__builtin_ia32_prold128:
15507 case X86::BI__builtin_ia32_prold256:
15508 case X86::BI__builtin_ia32_prold512:
15509 case X86::BI__builtin_ia32_prolq128:
15510 case X86::BI__builtin_ia32_prolq256:
15511 case X86::BI__builtin_ia32_prolq512:
15512 case X86::BI__builtin_ia32_prolvd128:
15513 case X86::BI__builtin_ia32_prolvd256:
15514 case X86::BI__builtin_ia32_prolvd512:
15515 case X86::BI__builtin_ia32_prolvq128:
15516 case X86::BI__builtin_ia32_prolvq256:
15517 case X86::BI__builtin_ia32_prolvq512:
15518 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
15519 case X86::BI__builtin_ia32_prord128:
15520 case X86::BI__builtin_ia32_prord256:
15521 case X86::BI__builtin_ia32_prord512:
15522 case X86::BI__builtin_ia32_prorq128:
15523 case X86::BI__builtin_ia32_prorq256:
15524 case X86::BI__builtin_ia32_prorq512:
15525 case X86::BI__builtin_ia32_prorvd128:
15526 case X86::BI__builtin_ia32_prorvd256:
15527 case X86::BI__builtin_ia32_prorvd512:
15528 case X86::BI__builtin_ia32_prorvq128:
15529 case X86::BI__builtin_ia32_prorvq256:
15530 case X86::BI__builtin_ia32_prorvq512:
15531 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
15532 case X86::BI__builtin_ia32_selectb_128:
15533 case X86::BI__builtin_ia32_selectb_256:
15534 case X86::BI__builtin_ia32_selectb_512:
15535 case X86::BI__builtin_ia32_selectw_128:
15536 case X86::BI__builtin_ia32_selectw_256:
15537 case X86::BI__builtin_ia32_selectw_512:
15538 case X86::BI__builtin_ia32_selectd_128:
15539 case X86::BI__builtin_ia32_selectd_256:
15540 case X86::BI__builtin_ia32_selectd_512:
15541 case X86::BI__builtin_ia32_selectq_128:
15542 case X86::BI__builtin_ia32_selectq_256:
15543 case X86::BI__builtin_ia32_selectq_512:
15544 case X86::BI__builtin_ia32_selectph_128:
15545 case X86::BI__builtin_ia32_selectph_256:
15546 case X86::BI__builtin_ia32_selectph_512:
15547 case X86::BI__builtin_ia32_selectpbf_128:
15548 case X86::BI__builtin_ia32_selectpbf_256:
15549 case X86::BI__builtin_ia32_selectpbf_512:
15550 case X86::BI__builtin_ia32_selectps_128:
15551 case X86::BI__builtin_ia32_selectps_256:
15552 case X86::BI__builtin_ia32_selectps_512:
15553 case X86::BI__builtin_ia32_selectpd_128:
15554 case X86::BI__builtin_ia32_selectpd_256:
15555 case X86::BI__builtin_ia32_selectpd_512:
15556 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
15557 case X86::BI__builtin_ia32_selectsh_128:
15558 case X86::BI__builtin_ia32_selectsbf_128:
15559 case X86::BI__builtin_ia32_selectss_128:
15560 case X86::BI__builtin_ia32_selectsd_128: {
15561 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15562 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15563 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
15564 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
15565 }
15566 case X86::BI__builtin_ia32_cmpb128_mask:
15567 case X86::BI__builtin_ia32_cmpb256_mask:
15568 case X86::BI__builtin_ia32_cmpb512_mask:
15569 case X86::BI__builtin_ia32_cmpw128_mask:
15570 case X86::BI__builtin_ia32_cmpw256_mask:
15571 case X86::BI__builtin_ia32_cmpw512_mask:
15572 case X86::BI__builtin_ia32_cmpd128_mask:
15573 case X86::BI__builtin_ia32_cmpd256_mask:
15574 case X86::BI__builtin_ia32_cmpd512_mask:
15575 case X86::BI__builtin_ia32_cmpq128_mask:
15576 case X86::BI__builtin_ia32_cmpq256_mask:
15577 case X86::BI__builtin_ia32_cmpq512_mask: {
15578 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15579 return EmitX86MaskedCompare(*this, CC, true, Ops);
15580 }
15581 case X86::BI__builtin_ia32_ucmpb128_mask:
15582 case X86::BI__builtin_ia32_ucmpb256_mask:
15583 case X86::BI__builtin_ia32_ucmpb512_mask:
15584 case X86::BI__builtin_ia32_ucmpw128_mask:
15585 case X86::BI__builtin_ia32_ucmpw256_mask:
15586 case X86::BI__builtin_ia32_ucmpw512_mask:
15587 case X86::BI__builtin_ia32_ucmpd128_mask:
15588 case X86::BI__builtin_ia32_ucmpd256_mask:
15589 case X86::BI__builtin_ia32_ucmpd512_mask:
15590 case X86::BI__builtin_ia32_ucmpq128_mask:
15591 case X86::BI__builtin_ia32_ucmpq256_mask:
15592 case X86::BI__builtin_ia32_ucmpq512_mask: {
15593 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15594 return EmitX86MaskedCompare(*this, CC, false, Ops);
15595 }
15596 case X86::BI__builtin_ia32_vpcomb:
15597 case X86::BI__builtin_ia32_vpcomw:
15598 case X86::BI__builtin_ia32_vpcomd:
15599 case X86::BI__builtin_ia32_vpcomq:
15600 return EmitX86vpcom(*this, Ops, true);
15601 case X86::BI__builtin_ia32_vpcomub:
15602 case X86::BI__builtin_ia32_vpcomuw:
15603 case X86::BI__builtin_ia32_vpcomud:
15604 case X86::BI__builtin_ia32_vpcomuq:
15605 return EmitX86vpcom(*this, Ops, false);
15606
15607 case X86::BI__builtin_ia32_kortestcqi:
15608 case X86::BI__builtin_ia32_kortestchi:
15609 case X86::BI__builtin_ia32_kortestcsi:
15610 case X86::BI__builtin_ia32_kortestcdi: {
15611 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15612 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
15613 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15614 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15615 }
15616 case X86::BI__builtin_ia32_kortestzqi:
15617 case X86::BI__builtin_ia32_kortestzhi:
15618 case X86::BI__builtin_ia32_kortestzsi:
15619 case X86::BI__builtin_ia32_kortestzdi: {
15620 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15621 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
15622 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15623 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15624 }
15625
15626 case X86::BI__builtin_ia32_ktestcqi:
15627 case X86::BI__builtin_ia32_ktestzqi:
15628 case X86::BI__builtin_ia32_ktestchi:
15629 case X86::BI__builtin_ia32_ktestzhi:
15630 case X86::BI__builtin_ia32_ktestcsi:
15631 case X86::BI__builtin_ia32_ktestzsi:
15632 case X86::BI__builtin_ia32_ktestcdi:
15633 case X86::BI__builtin_ia32_ktestzdi: {
15634 Intrinsic::ID IID;
15635 switch (BuiltinID) {
15636 default: llvm_unreachable("Unsupported intrinsic!");
15637 case X86::BI__builtin_ia32_ktestcqi:
15638 IID = Intrinsic::x86_avx512_ktestc_b;
15639 break;
15640 case X86::BI__builtin_ia32_ktestzqi:
15641 IID = Intrinsic::x86_avx512_ktestz_b;
15642 break;
15643 case X86::BI__builtin_ia32_ktestchi:
15644 IID = Intrinsic::x86_avx512_ktestc_w;
15645 break;
15646 case X86::BI__builtin_ia32_ktestzhi:
15647 IID = Intrinsic::x86_avx512_ktestz_w;
15648 break;
15649 case X86::BI__builtin_ia32_ktestcsi:
15650 IID = Intrinsic::x86_avx512_ktestc_d;
15651 break;
15652 case X86::BI__builtin_ia32_ktestzsi:
15653 IID = Intrinsic::x86_avx512_ktestz_d;
15654 break;
15655 case X86::BI__builtin_ia32_ktestcdi:
15656 IID = Intrinsic::x86_avx512_ktestc_q;
15657 break;
15658 case X86::BI__builtin_ia32_ktestzdi:
15659 IID = Intrinsic::x86_avx512_ktestz_q;
15660 break;
15661 }
15662
15663 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15664 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15665 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15666 Function *Intr = CGM.getIntrinsic(IID);
15667 return Builder.CreateCall(Intr, {LHS, RHS});
15668 }
15669
15670 case X86::BI__builtin_ia32_kaddqi:
15671 case X86::BI__builtin_ia32_kaddhi:
15672 case X86::BI__builtin_ia32_kaddsi:
15673 case X86::BI__builtin_ia32_kadddi: {
15674 Intrinsic::ID IID;
15675 switch (BuiltinID) {
15676 default: llvm_unreachable("Unsupported intrinsic!");
15677 case X86::BI__builtin_ia32_kaddqi:
15678 IID = Intrinsic::x86_avx512_kadd_b;
15679 break;
15680 case X86::BI__builtin_ia32_kaddhi:
15681 IID = Intrinsic::x86_avx512_kadd_w;
15682 break;
15683 case X86::BI__builtin_ia32_kaddsi:
15684 IID = Intrinsic::x86_avx512_kadd_d;
15685 break;
15686 case X86::BI__builtin_ia32_kadddi:
15687 IID = Intrinsic::x86_avx512_kadd_q;
15688 break;
15689 }
15690
15691 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15692 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15693 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15694 Function *Intr = CGM.getIntrinsic(IID);
15695 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
15696 return Builder.CreateBitCast(Res, Ops[0]->getType());
15697 }
15698 case X86::BI__builtin_ia32_kandqi:
15699 case X86::BI__builtin_ia32_kandhi:
15700 case X86::BI__builtin_ia32_kandsi:
15701 case X86::BI__builtin_ia32_kanddi:
15702 return EmitX86MaskLogic(*this, Instruction::And, Ops);
15703 case X86::BI__builtin_ia32_kandnqi:
15704 case X86::BI__builtin_ia32_kandnhi:
15705 case X86::BI__builtin_ia32_kandnsi:
15706 case X86::BI__builtin_ia32_kandndi:
15707 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
15708 case X86::BI__builtin_ia32_korqi:
15709 case X86::BI__builtin_ia32_korhi:
15710 case X86::BI__builtin_ia32_korsi:
15711 case X86::BI__builtin_ia32_kordi:
15712 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
15713 case X86::BI__builtin_ia32_kxnorqi:
15714 case X86::BI__builtin_ia32_kxnorhi:
15715 case X86::BI__builtin_ia32_kxnorsi:
15716 case X86::BI__builtin_ia32_kxnordi:
15717 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
15718 case X86::BI__builtin_ia32_kxorqi:
15719 case X86::BI__builtin_ia32_kxorhi:
15720 case X86::BI__builtin_ia32_kxorsi:
15721 case X86::BI__builtin_ia32_kxordi:
15722 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
15723 case X86::BI__builtin_ia32_knotqi:
15724 case X86::BI__builtin_ia32_knothi:
15725 case X86::BI__builtin_ia32_knotsi:
15726 case X86::BI__builtin_ia32_knotdi: {
15727 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15728 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15729 return Builder.CreateBitCast(Builder.CreateNot(Res),
15730 Ops[0]->getType());
15731 }
15732 case X86::BI__builtin_ia32_kmovb:
15733 case X86::BI__builtin_ia32_kmovw:
15734 case X86::BI__builtin_ia32_kmovd:
15735 case X86::BI__builtin_ia32_kmovq: {
15736 // Bitcast to vXi1 type and then back to integer. This gets the mask
15737 // register type into the IR, but might be optimized out depending on
15738 // what's around it.
15739 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15740 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15741 return Builder.CreateBitCast(Res, Ops[0]->getType());
15742 }
15743
15744 case X86::BI__builtin_ia32_kunpckdi:
15745 case X86::BI__builtin_ia32_kunpcksi:
15746 case X86::BI__builtin_ia32_kunpckhi: {
15747 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15748 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15749 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15750 int Indices[64];
15751 for (unsigned i = 0; i != NumElts; ++i)
15752 Indices[i] = i;
15753
15754 // First extract half of each vector. This gives better codegen than
15755 // doing it in a single shuffle.
15756 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
15757 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
15758 // Concat the vectors.
15759 // NOTE: Operands are swapped to match the intrinsic definition.
15760 Value *Res =
15761 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
15762 return Builder.CreateBitCast(Res, Ops[0]->getType());
15763 }
15764
15765 case X86::BI__builtin_ia32_vplzcntd_128:
15766 case X86::BI__builtin_ia32_vplzcntd_256:
15767 case X86::BI__builtin_ia32_vplzcntd_512:
15768 case X86::BI__builtin_ia32_vplzcntq_128:
15769 case X86::BI__builtin_ia32_vplzcntq_256:
15770 case X86::BI__builtin_ia32_vplzcntq_512: {
15771 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15772 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
15773 }
15774 case X86::BI__builtin_ia32_sqrtss:
15775 case X86::BI__builtin_ia32_sqrtsd: {
15776 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
15777 Function *F;
15778 if (Builder.getIsFPConstrained()) {
15779 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15780 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15781 A->getType());
15782 A = Builder.CreateConstrainedFPCall(F, {A});
15783 } else {
15784 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15785 A = Builder.CreateCall(F, {A});
15786 }
15787 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15788 }
15789 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15790 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15791 case X86::BI__builtin_ia32_sqrtss_round_mask: {
15792 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
15793 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15794 // otherwise keep the intrinsic.
15795 if (CC != 4) {
15796 Intrinsic::ID IID;
15797
15798 switch (BuiltinID) {
15799 default:
15800 llvm_unreachable("Unsupported intrinsic!");
15801 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15802 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
15803 break;
15804 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15805 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
15806 break;
15807 case X86::BI__builtin_ia32_sqrtss_round_mask:
15808 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
15809 break;
15810 }
15811 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15812 }
15813 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15814 Function *F;
15815 if (Builder.getIsFPConstrained()) {
15816 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15817 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15818 A->getType());
15819 A = Builder.CreateConstrainedFPCall(F, A);
15820 } else {
15821 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15822 A = Builder.CreateCall(F, A);
15823 }
15824 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15825 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
15826 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15827 }
15828 case X86::BI__builtin_ia32_sqrtpd256:
15829 case X86::BI__builtin_ia32_sqrtpd:
15830 case X86::BI__builtin_ia32_sqrtps256:
15831 case X86::BI__builtin_ia32_sqrtps:
15832 case X86::BI__builtin_ia32_sqrtph256:
15833 case X86::BI__builtin_ia32_sqrtph:
15834 case X86::BI__builtin_ia32_sqrtph512:
15835 case X86::BI__builtin_ia32_sqrtps512:
15836 case X86::BI__builtin_ia32_sqrtpd512: {
15837 if (Ops.size() == 2) {
15838 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15839 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15840 // otherwise keep the intrinsic.
15841 if (CC != 4) {
15842 Intrinsic::ID IID;
15843
15844 switch (BuiltinID) {
15845 default:
15846 llvm_unreachable("Unsupported intrinsic!");
15847 case X86::BI__builtin_ia32_sqrtph512:
15848 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
15849 break;
15850 case X86::BI__builtin_ia32_sqrtps512:
15851 IID = Intrinsic::x86_avx512_sqrt_ps_512;
15852 break;
15853 case X86::BI__builtin_ia32_sqrtpd512:
15854 IID = Intrinsic::x86_avx512_sqrt_pd_512;
15855 break;
15856 }
15857 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15858 }
15859 }
15860 if (Builder.getIsFPConstrained()) {
15861 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15862 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15863 Ops[0]->getType());
15864 return Builder.CreateConstrainedFPCall(F, Ops[0]);
15865 } else {
15866 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
15867 return Builder.CreateCall(F, Ops[0]);
15868 }
15869 }
15870
15871 case X86::BI__builtin_ia32_pmuludq128:
15872 case X86::BI__builtin_ia32_pmuludq256:
15873 case X86::BI__builtin_ia32_pmuludq512:
15874 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
15875
15876 case X86::BI__builtin_ia32_pmuldq128:
15877 case X86::BI__builtin_ia32_pmuldq256:
15878 case X86::BI__builtin_ia32_pmuldq512:
15879 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
15880
15881 case X86::BI__builtin_ia32_pternlogd512_mask:
15882 case X86::BI__builtin_ia32_pternlogq512_mask:
15883 case X86::BI__builtin_ia32_pternlogd128_mask:
15884 case X86::BI__builtin_ia32_pternlogd256_mask:
15885 case X86::BI__builtin_ia32_pternlogq128_mask:
15886 case X86::BI__builtin_ia32_pternlogq256_mask:
15887 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
15888
15889 case X86::BI__builtin_ia32_pternlogd512_maskz:
15890 case X86::BI__builtin_ia32_pternlogq512_maskz:
15891 case X86::BI__builtin_ia32_pternlogd128_maskz:
15892 case X86::BI__builtin_ia32_pternlogd256_maskz:
15893 case X86::BI__builtin_ia32_pternlogq128_maskz:
15894 case X86::BI__builtin_ia32_pternlogq256_maskz:
15895 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
15896
15897 case X86::BI__builtin_ia32_vpshldd128:
15898 case X86::BI__builtin_ia32_vpshldd256:
15899 case X86::BI__builtin_ia32_vpshldd512:
15900 case X86::BI__builtin_ia32_vpshldq128:
15901 case X86::BI__builtin_ia32_vpshldq256:
15902 case X86::BI__builtin_ia32_vpshldq512:
15903 case X86::BI__builtin_ia32_vpshldw128:
15904 case X86::BI__builtin_ia32_vpshldw256:
15905 case X86::BI__builtin_ia32_vpshldw512:
15906 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15907
15908 case X86::BI__builtin_ia32_vpshrdd128:
15909 case X86::BI__builtin_ia32_vpshrdd256:
15910 case X86::BI__builtin_ia32_vpshrdd512:
15911 case X86::BI__builtin_ia32_vpshrdq128:
15912 case X86::BI__builtin_ia32_vpshrdq256:
15913 case X86::BI__builtin_ia32_vpshrdq512:
15914 case X86::BI__builtin_ia32_vpshrdw128:
15915 case X86::BI__builtin_ia32_vpshrdw256:
15916 case X86::BI__builtin_ia32_vpshrdw512:
15917 // Ops 0 and 1 are swapped.
15918 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15919
15920 case X86::BI__builtin_ia32_vpshldvd128:
15921 case X86::BI__builtin_ia32_vpshldvd256:
15922 case X86::BI__builtin_ia32_vpshldvd512:
15923 case X86::BI__builtin_ia32_vpshldvq128:
15924 case X86::BI__builtin_ia32_vpshldvq256:
15925 case X86::BI__builtin_ia32_vpshldvq512:
15926 case X86::BI__builtin_ia32_vpshldvw128:
15927 case X86::BI__builtin_ia32_vpshldvw256:
15928 case X86::BI__builtin_ia32_vpshldvw512:
15929 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15930
15931 case X86::BI__builtin_ia32_vpshrdvd128:
15932 case X86::BI__builtin_ia32_vpshrdvd256:
15933 case X86::BI__builtin_ia32_vpshrdvd512:
15934 case X86::BI__builtin_ia32_vpshrdvq128:
15935 case X86::BI__builtin_ia32_vpshrdvq256:
15936 case X86::BI__builtin_ia32_vpshrdvq512:
15937 case X86::BI__builtin_ia32_vpshrdvw128:
15938 case X86::BI__builtin_ia32_vpshrdvw256:
15939 case X86::BI__builtin_ia32_vpshrdvw512:
15940 // Ops 0 and 1 are swapped.
15941 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15942
15943 // Reductions
15944 case X86::BI__builtin_ia32_reduce_fadd_pd512:
15945 case X86::BI__builtin_ia32_reduce_fadd_ps512:
15946 case X86::BI__builtin_ia32_reduce_fadd_ph512:
15947 case X86::BI__builtin_ia32_reduce_fadd_ph256:
15948 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
15949 Function *F =
15950 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
15951 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15952 Builder.getFastMathFlags().setAllowReassoc();
15953 return Builder.CreateCall(F, {Ops[0], Ops[1]});
15954 }
15955 case X86::BI__builtin_ia32_reduce_fmul_pd512:
15956 case X86::BI__builtin_ia32_reduce_fmul_ps512:
15957 case X86::BI__builtin_ia32_reduce_fmul_ph512:
15958 case X86::BI__builtin_ia32_reduce_fmul_ph256:
15959 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
15960 Function *F =
15961 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
15962 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15963 Builder.getFastMathFlags().setAllowReassoc();
15964 return Builder.CreateCall(F, {Ops[0], Ops[1]});
15965 }
15966 case X86::BI__builtin_ia32_reduce_fmax_pd512:
15967 case X86::BI__builtin_ia32_reduce_fmax_ps512:
15968 case X86::BI__builtin_ia32_reduce_fmax_ph512:
15969 case X86::BI__builtin_ia32_reduce_fmax_ph256:
15970 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
15971 Function *F =
15972 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
15973 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15974 Builder.getFastMathFlags().setNoNaNs();
15975 return Builder.CreateCall(F, {Ops[0]});
15976 }
15977 case X86::BI__builtin_ia32_reduce_fmin_pd512:
15978 case X86::BI__builtin_ia32_reduce_fmin_ps512:
15979 case X86::BI__builtin_ia32_reduce_fmin_ph512:
15980 case X86::BI__builtin_ia32_reduce_fmin_ph256:
15981 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
15982 Function *F =
15983 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
15984 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15985 Builder.getFastMathFlags().setNoNaNs();
15986 return Builder.CreateCall(F, {Ops[0]});
15987 }
15988
15989 // 3DNow!
15990 case X86::BI__builtin_ia32_pswapdsf:
15991 case X86::BI__builtin_ia32_pswapdsi: {
15992 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
15993 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
15994 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
15995 return Builder.CreateCall(F, Ops, "pswapd");
15996 }
15997 case X86::BI__builtin_ia32_rdrand16_step:
15998 case X86::BI__builtin_ia32_rdrand32_step:
15999 case X86::BI__builtin_ia32_rdrand64_step:
16000 case X86::BI__builtin_ia32_rdseed16_step:
16001 case X86::BI__builtin_ia32_rdseed32_step:
16002 case X86::BI__builtin_ia32_rdseed64_step: {
16003 Intrinsic::ID ID;
16004 switch (BuiltinID) {
16005 default: llvm_unreachable("Unsupported intrinsic!");
16006 case X86::BI__builtin_ia32_rdrand16_step:
16007 ID = Intrinsic::x86_rdrand_16;
16008 break;
16009 case X86::BI__builtin_ia32_rdrand32_step:
16010 ID = Intrinsic::x86_rdrand_32;
16011 break;
16012 case X86::BI__builtin_ia32_rdrand64_step:
16013 ID = Intrinsic::x86_rdrand_64;
16014 break;
16015 case X86::BI__builtin_ia32_rdseed16_step:
16016 ID = Intrinsic::x86_rdseed_16;
16017 break;
16018 case X86::BI__builtin_ia32_rdseed32_step:
16019 ID = Intrinsic::x86_rdseed_32;
16020 break;
16021 case X86::BI__builtin_ia32_rdseed64_step:
16022 ID = Intrinsic::x86_rdseed_64;
16023 break;
16024 }
16025
16026 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16027 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16028 Ops[0]);
16029 return Builder.CreateExtractValue(Call, 1);
16030 }
16031 case X86::BI__builtin_ia32_addcarryx_u32:
16032 case X86::BI__builtin_ia32_addcarryx_u64:
16033 case X86::BI__builtin_ia32_subborrow_u32:
16034 case X86::BI__builtin_ia32_subborrow_u64: {
16035 Intrinsic::ID IID;
16036 switch (BuiltinID) {
16037 default: llvm_unreachable("Unsupported intrinsic!");
16038 case X86::BI__builtin_ia32_addcarryx_u32:
16039 IID = Intrinsic::x86_addcarry_32;
16040 break;
16041 case X86::BI__builtin_ia32_addcarryx_u64:
16042 IID = Intrinsic::x86_addcarry_64;
16043 break;
16044 case X86::BI__builtin_ia32_subborrow_u32:
16045 IID = Intrinsic::x86_subborrow_32;
16046 break;
16047 case X86::BI__builtin_ia32_subborrow_u64:
16048 IID = Intrinsic::x86_subborrow_64;
16049 break;
16050 }
16051
16052 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16053 { Ops[0], Ops[1], Ops[2] });
16054 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16055 Ops[3]);
16056 return Builder.CreateExtractValue(Call, 0);
16057 }
16058
16059 case X86::BI__builtin_ia32_fpclassps128_mask:
16060 case X86::BI__builtin_ia32_fpclassps256_mask:
16061 case X86::BI__builtin_ia32_fpclassps512_mask:
16062 case X86::BI__builtin_ia32_fpclassph128_mask:
16063 case X86::BI__builtin_ia32_fpclassph256_mask:
16064 case X86::BI__builtin_ia32_fpclassph512_mask:
16065 case X86::BI__builtin_ia32_fpclasspd128_mask:
16066 case X86::BI__builtin_ia32_fpclasspd256_mask:
16067 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16068 unsigned NumElts =
16069 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16070 Value *MaskIn = Ops[2];
16071 Ops.erase(&Ops[2]);
16072
16073 Intrinsic::ID ID;
16074 switch (BuiltinID) {
16075 default: llvm_unreachable("Unsupported intrinsic!");
16076 case X86::BI__builtin_ia32_fpclassph128_mask:
16077 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16078 break;
16079 case X86::BI__builtin_ia32_fpclassph256_mask:
16080 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16081 break;
16082 case X86::BI__builtin_ia32_fpclassph512_mask:
16083 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16084 break;
16085 case X86::BI__builtin_ia32_fpclassps128_mask:
16086 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16087 break;
16088 case X86::BI__builtin_ia32_fpclassps256_mask:
16089 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16090 break;
16091 case X86::BI__builtin_ia32_fpclassps512_mask:
16092 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16093 break;
16094 case X86::BI__builtin_ia32_fpclasspd128_mask:
16095 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16096 break;
16097 case X86::BI__builtin_ia32_fpclasspd256_mask:
16098 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16099 break;
16100 case X86::BI__builtin_ia32_fpclasspd512_mask:
16101 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16102 break;
16103 }
16104
16105 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16106 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16107 }
16108
16109 case X86::BI__builtin_ia32_vp2intersect_q_512:
16110 case X86::BI__builtin_ia32_vp2intersect_q_256:
16111 case X86::BI__builtin_ia32_vp2intersect_q_128:
16112 case X86::BI__builtin_ia32_vp2intersect_d_512:
16113 case X86::BI__builtin_ia32_vp2intersect_d_256:
16114 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16115 unsigned NumElts =
16116 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16117 Intrinsic::ID ID;
16118
16119 switch (BuiltinID) {
16120 default: llvm_unreachable("Unsupported intrinsic!");
16121 case X86::BI__builtin_ia32_vp2intersect_q_512:
16122 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16123 break;
16124 case X86::BI__builtin_ia32_vp2intersect_q_256:
16125 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16126 break;
16127 case X86::BI__builtin_ia32_vp2intersect_q_128:
16128 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16129 break;
16130 case X86::BI__builtin_ia32_vp2intersect_d_512:
16131 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16132 break;
16133 case X86::BI__builtin_ia32_vp2intersect_d_256:
16134 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16135 break;
16136 case X86::BI__builtin_ia32_vp2intersect_d_128:
16137 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16138 break;
16139 }
16140
16141 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16142 Value *Result = Builder.CreateExtractValue(Call, 0);
16143 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16145
16146 Result = Builder.CreateExtractValue(Call, 1);
16147 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16149 }
16150
16151 case X86::BI__builtin_ia32_vpmultishiftqb128:
16152 case X86::BI__builtin_ia32_vpmultishiftqb256:
16153 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16154 Intrinsic::ID ID;
16155 switch (BuiltinID) {
16156 default: llvm_unreachable("Unsupported intrinsic!");
16157 case X86::BI__builtin_ia32_vpmultishiftqb128:
16158 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16159 break;
16160 case X86::BI__builtin_ia32_vpmultishiftqb256:
16161 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16162 break;
16163 case X86::BI__builtin_ia32_vpmultishiftqb512:
16164 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16165 break;
16166 }
16167
16168 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16169 }
16170
16171 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16172 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16173 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16174 unsigned NumElts =
16175 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16176 Value *MaskIn = Ops[2];
16177 Ops.erase(&Ops[2]);
16178
16179 Intrinsic::ID ID;
16180 switch (BuiltinID) {
16181 default: llvm_unreachable("Unsupported intrinsic!");
16182 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16183 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16184 break;
16185 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16186 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16187 break;
16188 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16189 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16190 break;
16191 }
16192
16193 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16194 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16195 }
16196
16197 // packed comparison intrinsics
16198 case X86::BI__builtin_ia32_cmpeqps:
16199 case X86::BI__builtin_ia32_cmpeqpd:
16200 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16201 case X86::BI__builtin_ia32_cmpltps:
16202 case X86::BI__builtin_ia32_cmpltpd:
16203 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16204 case X86::BI__builtin_ia32_cmpleps:
16205 case X86::BI__builtin_ia32_cmplepd:
16206 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16207 case X86::BI__builtin_ia32_cmpunordps:
16208 case X86::BI__builtin_ia32_cmpunordpd:
16209 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16210 case X86::BI__builtin_ia32_cmpneqps:
16211 case X86::BI__builtin_ia32_cmpneqpd:
16212 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16213 case X86::BI__builtin_ia32_cmpnltps:
16214 case X86::BI__builtin_ia32_cmpnltpd:
16215 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
16216 case X86::BI__builtin_ia32_cmpnleps:
16217 case X86::BI__builtin_ia32_cmpnlepd:
16218 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
16219 case X86::BI__builtin_ia32_cmpordps:
16220 case X86::BI__builtin_ia32_cmpordpd:
16221 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
16222 case X86::BI__builtin_ia32_cmpph128_mask:
16223 case X86::BI__builtin_ia32_cmpph256_mask:
16224 case X86::BI__builtin_ia32_cmpph512_mask:
16225 case X86::BI__builtin_ia32_cmpps128_mask:
16226 case X86::BI__builtin_ia32_cmpps256_mask:
16227 case X86::BI__builtin_ia32_cmpps512_mask:
16228 case X86::BI__builtin_ia32_cmppd128_mask:
16229 case X86::BI__builtin_ia32_cmppd256_mask:
16230 case X86::BI__builtin_ia32_cmppd512_mask:
16231 IsMaskFCmp = true;
16232 [[fallthrough]];
16233 case X86::BI__builtin_ia32_cmpps:
16234 case X86::BI__builtin_ia32_cmpps256:
16235 case X86::BI__builtin_ia32_cmppd:
16236 case X86::BI__builtin_ia32_cmppd256: {
16237 // Lowering vector comparisons to fcmp instructions, while
16238 // ignoring signalling behaviour requested
16239 // ignoring rounding mode requested
16240 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16241
16242 // The third argument is the comparison condition, and integer in the
16243 // range [0, 31]
16244 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16245
16246 // Lowering to IR fcmp instruction.
16247 // Ignoring requested signaling behaviour,
16248 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16249 FCmpInst::Predicate Pred;
16250 bool IsSignaling;
16251 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16252 // behavior is inverted. We'll handle that after the switch.
16253 switch (CC & 0xf) {
16254 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
16255 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
16256 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
16257 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
16258 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
16259 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
16260 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
16261 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
16262 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
16263 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
16264 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
16265 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
16266 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
16267 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
16268 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
16269 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
16270 default: llvm_unreachable("Unhandled CC");
16271 }
16272
16273 // Invert the signalling behavior for 16-31.
16274 if (CC & 0x10)
16275 IsSignaling = !IsSignaling;
16276
16277 // If the predicate is true or false and we're using constrained intrinsics,
16278 // we don't have a compare intrinsic we can use. Just use the legacy X86
16279 // specific intrinsic.
16280 // If the intrinsic is mask enabled and we're using constrained intrinsics,
16281 // use the legacy X86 specific intrinsic.
16282 if (Builder.getIsFPConstrained() &&
16283 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
16284 IsMaskFCmp)) {
16285
16286 Intrinsic::ID IID;
16287 switch (BuiltinID) {
16288 default: llvm_unreachable("Unexpected builtin");
16289 case X86::BI__builtin_ia32_cmpps:
16290 IID = Intrinsic::x86_sse_cmp_ps;
16291 break;
16292 case X86::BI__builtin_ia32_cmpps256:
16293 IID = Intrinsic::x86_avx_cmp_ps_256;
16294 break;
16295 case X86::BI__builtin_ia32_cmppd:
16296 IID = Intrinsic::x86_sse2_cmp_pd;
16297 break;
16298 case X86::BI__builtin_ia32_cmppd256:
16299 IID = Intrinsic::x86_avx_cmp_pd_256;
16300 break;
16301 case X86::BI__builtin_ia32_cmpph128_mask:
16302 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
16303 break;
16304 case X86::BI__builtin_ia32_cmpph256_mask:
16305 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
16306 break;
16307 case X86::BI__builtin_ia32_cmpph512_mask:
16308 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
16309 break;
16310 case X86::BI__builtin_ia32_cmpps512_mask:
16311 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
16312 break;
16313 case X86::BI__builtin_ia32_cmppd512_mask:
16314 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
16315 break;
16316 case X86::BI__builtin_ia32_cmpps128_mask:
16317 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
16318 break;
16319 case X86::BI__builtin_ia32_cmpps256_mask:
16320 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
16321 break;
16322 case X86::BI__builtin_ia32_cmppd128_mask:
16323 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
16324 break;
16325 case X86::BI__builtin_ia32_cmppd256_mask:
16326 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
16327 break;
16328 }
16329
16330 Function *Intr = CGM.getIntrinsic(IID);
16331 if (IsMaskFCmp) {
16332 unsigned NumElts =
16333 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16334 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
16335 Value *Cmp = Builder.CreateCall(Intr, Ops);
16336 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
16337 }
16338
16339 return Builder.CreateCall(Intr, Ops);
16340 }
16341
16342 // Builtins without the _mask suffix return a vector of integers
16343 // of the same width as the input vectors
16344 if (IsMaskFCmp) {
16345 // We ignore SAE if strict FP is disabled. We only keep precise
16346 // exception behavior under strict FP.
16347 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
16348 // object will be required.
16349 unsigned NumElts =
16350 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16351 Value *Cmp;
16352 if (IsSignaling)
16353 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
16354 else
16355 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
16356 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
16357 }
16358
16359 return getVectorFCmpIR(Pred, IsSignaling);
16360 }
16361
16362 // SSE scalar comparison intrinsics
16363 case X86::BI__builtin_ia32_cmpeqss:
16364 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
16365 case X86::BI__builtin_ia32_cmpltss:
16366 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
16367 case X86::BI__builtin_ia32_cmpless:
16368 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
16369 case X86::BI__builtin_ia32_cmpunordss:
16370 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
16371 case X86::BI__builtin_ia32_cmpneqss:
16372 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
16373 case X86::BI__builtin_ia32_cmpnltss:
16374 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
16375 case X86::BI__builtin_ia32_cmpnless:
16376 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
16377 case X86::BI__builtin_ia32_cmpordss:
16378 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
16379 case X86::BI__builtin_ia32_cmpeqsd:
16380 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
16381 case X86::BI__builtin_ia32_cmpltsd:
16382 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
16383 case X86::BI__builtin_ia32_cmplesd:
16384 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
16385 case X86::BI__builtin_ia32_cmpunordsd:
16386 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
16387 case X86::BI__builtin_ia32_cmpneqsd:
16388 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
16389 case X86::BI__builtin_ia32_cmpnltsd:
16390 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
16391 case X86::BI__builtin_ia32_cmpnlesd:
16392 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
16393 case X86::BI__builtin_ia32_cmpordsd:
16394 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
16395
16396 // f16c half2float intrinsics
16397 case X86::BI__builtin_ia32_vcvtph2ps:
16398 case X86::BI__builtin_ia32_vcvtph2ps256:
16399 case X86::BI__builtin_ia32_vcvtph2ps_mask:
16400 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
16401 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
16402 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16403 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
16404 }
16405
16406 // AVX512 bf16 intrinsics
16407 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
16408 Ops[2] = getMaskVecValue(
16409 *this, Ops[2],
16410 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
16411 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
16412 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16413 }
16414 case X86::BI__builtin_ia32_cvtsbf162ss_32:
16415 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
16416
16417 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16418 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
16419 Intrinsic::ID IID;
16420 switch (BuiltinID) {
16421 default: llvm_unreachable("Unsupported intrinsic!");
16422 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16423 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
16424 break;
16425 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
16426 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
16427 break;
16428 }
16429 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
16430 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
16431 }
16432
16433 case X86::BI__cpuid:
16434 case X86::BI__cpuidex: {
16435 Value *FuncId = EmitScalarExpr(E->getArg(1));
16436 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
16437 ? EmitScalarExpr(E->getArg(2))
16438 : llvm::ConstantInt::get(Int32Ty, 0);
16439
16440 llvm::StructType *CpuidRetTy =
16441 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
16442 llvm::FunctionType *FTy =
16443 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
16444
16445 StringRef Asm, Constraints;
16446 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
16447 Asm = "cpuid";
16448 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
16449 } else {
16450 // x86-64 uses %rbx as the base register, so preserve it.
16451 Asm = "xchgq %rbx, ${1:q}\n"
16452 "cpuid\n"
16453 "xchgq %rbx, ${1:q}";
16454 Constraints = "={ax},=r,={cx},={dx},0,2";
16455 }
16456
16457 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
16458 /*hasSideEffects=*/false);
16459 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
16460 Value *BasePtr = EmitScalarExpr(E->getArg(0));
16461 Value *Store = nullptr;
16462 for (unsigned i = 0; i < 4; i++) {
16463 Value *Extracted = Builder.CreateExtractValue(IACall, i);
16464 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
16465 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
16466 }
16467
16468 // Return the last store instruction to signal that we have emitted the
16469 // the intrinsic.
16470 return Store;
16471 }
16472
16473 case X86::BI__emul:
16474 case X86::BI__emulu: {
16475 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
16476 bool isSigned = (BuiltinID == X86::BI__emul);
16477 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
16478 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
16479 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
16480 }
16481 case X86::BI__mulh:
16482 case X86::BI__umulh:
16483 case X86::BI_mul128:
16484 case X86::BI_umul128: {
16485 llvm::Type *ResType = ConvertType(E->getType());
16486 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16487
16488 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
16489 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
16490 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
16491
16492 Value *MulResult, *HigherBits;
16493 if (IsSigned) {
16494 MulResult = Builder.CreateNSWMul(LHS, RHS);
16495 HigherBits = Builder.CreateAShr(MulResult, 64);
16496 } else {
16497 MulResult = Builder.CreateNUWMul(LHS, RHS);
16498 HigherBits = Builder.CreateLShr(MulResult, 64);
16499 }
16500 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
16501
16502 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
16503 return HigherBits;
16504
16505 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
16506 Builder.CreateStore(HigherBits, HighBitsAddress);
16507 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
16508 }
16509
16510 case X86::BI__faststorefence: {
16511 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16512 llvm::SyncScope::System);
16513 }
16514 case X86::BI__shiftleft128:
16515 case X86::BI__shiftright128: {
16516 llvm::Function *F = CGM.getIntrinsic(
16517 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
16518 Int64Ty);
16519 // Flip low/high ops and zero-extend amount to matching type.
16520 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
16521 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
16522 std::swap(Ops[0], Ops[1]);
16523 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
16524 return Builder.CreateCall(F, Ops);
16525 }
16526 case X86::BI_ReadWriteBarrier:
16527 case X86::BI_ReadBarrier:
16528 case X86::BI_WriteBarrier: {
16529 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16530 llvm::SyncScope::SingleThread);
16531 }
16532
16533 case X86::BI_AddressOfReturnAddress: {
16534 Function *F =
16535 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
16536 return Builder.CreateCall(F);
16537 }
16538 case X86::BI__stosb: {
16539 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
16540 // instruction, but it will create a memset that won't be optimized away.
16541 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
16542 }
16543 case X86::BI__ud2:
16544 // llvm.trap makes a ud2a instruction on x86.
16545 return EmitTrapCall(Intrinsic::trap);
16546 case X86::BI__int2c: {
16547 // This syscall signals a driver assertion failure in x86 NT kernels.
16548 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
16549 llvm::InlineAsm *IA =
16550 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
16551 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
16552 getLLVMContext(), llvm::AttributeList::FunctionIndex,
16553 llvm::Attribute::NoReturn);
16554 llvm::CallInst *CI = Builder.CreateCall(IA);
16555 CI->setAttributes(NoReturnAttr);
16556 return CI;
16557 }
16558 case X86::BI__readfsbyte:
16559 case X86::BI__readfsword:
16560 case X86::BI__readfsdword:
16561 case X86::BI__readfsqword: {
16562 llvm::Type *IntTy = ConvertType(E->getType());
16563 Value *Ptr = Builder.CreateIntToPtr(
16564 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
16565 LoadInst *Load = Builder.CreateAlignedLoad(
16566 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16567 Load->setVolatile(true);
16568 return Load;
16569 }
16570 case X86::BI__readgsbyte:
16571 case X86::BI__readgsword:
16572 case X86::BI__readgsdword:
16573 case X86::BI__readgsqword: {
16574 llvm::Type *IntTy = ConvertType(E->getType());
16575 Value *Ptr = Builder.CreateIntToPtr(
16576 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
16577 LoadInst *Load = Builder.CreateAlignedLoad(
16578 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16579 Load->setVolatile(true);
16580 return Load;
16581 }
16582 case X86::BI__builtin_ia32_encodekey128_u32: {
16583 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
16584
16585 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
16586
16587 for (int i = 0; i < 3; ++i) {
16588 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16589 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
16590 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16591 }
16592
16593 return Builder.CreateExtractValue(Call, 0);
16594 }
16595 case X86::BI__builtin_ia32_encodekey256_u32: {
16596 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
16597
16598 Value *Call =
16599 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
16600
16601 for (int i = 0; i < 4; ++i) {
16602 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16603 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
16604 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16605 }
16606
16607 return Builder.CreateExtractValue(Call, 0);
16608 }
16609 case X86::BI__builtin_ia32_aesenc128kl_u8:
16610 case X86::BI__builtin_ia32_aesdec128kl_u8:
16611 case X86::BI__builtin_ia32_aesenc256kl_u8:
16612 case X86::BI__builtin_ia32_aesdec256kl_u8: {
16613 Intrinsic::ID IID;
16614 StringRef BlockName;
16615 switch (BuiltinID) {
16616 default:
16617 llvm_unreachable("Unexpected builtin");
16618 case X86::BI__builtin_ia32_aesenc128kl_u8:
16619 IID = Intrinsic::x86_aesenc128kl;
16620 BlockName = "aesenc128kl";
16621 break;
16622 case X86::BI__builtin_ia32_aesdec128kl_u8:
16623 IID = Intrinsic::x86_aesdec128kl;
16624 BlockName = "aesdec128kl";
16625 break;
16626 case X86::BI__builtin_ia32_aesenc256kl_u8:
16627 IID = Intrinsic::x86_aesenc256kl;
16628 BlockName = "aesenc256kl";
16629 break;
16630 case X86::BI__builtin_ia32_aesdec256kl_u8:
16631 IID = Intrinsic::x86_aesdec256kl;
16632 BlockName = "aesdec256kl";
16633 break;
16634 }
16635
16636 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
16637
16638 BasicBlock *NoError =
16639 createBasicBlock(BlockName + "_no_error", this->CurFn);
16640 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16641 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16642
16643 Value *Ret = Builder.CreateExtractValue(Call, 0);
16644 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16645 Value *Out = Builder.CreateExtractValue(Call, 1);
16646 Builder.CreateCondBr(Succ, NoError, Error);
16647
16648 Builder.SetInsertPoint(NoError);
16650 Builder.CreateBr(End);
16651
16652 Builder.SetInsertPoint(Error);
16653 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16654 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
16655 Builder.CreateBr(End);
16656
16657 Builder.SetInsertPoint(End);
16658 return Builder.CreateExtractValue(Call, 0);
16659 }
16660 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16661 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16662 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16663 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
16664 Intrinsic::ID IID;
16665 StringRef BlockName;
16666 switch (BuiltinID) {
16667 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16668 IID = Intrinsic::x86_aesencwide128kl;
16669 BlockName = "aesencwide128kl";
16670 break;
16671 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16672 IID = Intrinsic::x86_aesdecwide128kl;
16673 BlockName = "aesdecwide128kl";
16674 break;
16675 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16676 IID = Intrinsic::x86_aesencwide256kl;
16677 BlockName = "aesencwide256kl";
16678 break;
16679 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
16680 IID = Intrinsic::x86_aesdecwide256kl;
16681 BlockName = "aesdecwide256kl";
16682 break;
16683 }
16684
16685 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
16686 Value *InOps[9];
16687 InOps[0] = Ops[2];
16688 for (int i = 0; i != 8; ++i) {
16689 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
16690 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
16691 }
16692
16693 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
16694
16695 BasicBlock *NoError =
16696 createBasicBlock(BlockName + "_no_error", this->CurFn);
16697 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16698 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16699
16700 Value *Ret = Builder.CreateExtractValue(Call, 0);
16701 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16702 Builder.CreateCondBr(Succ, NoError, Error);
16703
16704 Builder.SetInsertPoint(NoError);
16705 for (int i = 0; i != 8; ++i) {
16706 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16707 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
16708 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
16709 }
16710 Builder.CreateBr(End);
16711
16712 Builder.SetInsertPoint(Error);
16713 for (int i = 0; i != 8; ++i) {
16714 Value *Out = Builder.CreateExtractValue(Call, i + 1);
16715 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16716 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
16717 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
16718 }
16719 Builder.CreateBr(End);
16720
16721 Builder.SetInsertPoint(End);
16722 return Builder.CreateExtractValue(Call, 0);
16723 }
16724 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
16725 IsConjFMA = true;
16726 [[fallthrough]];
16727 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
16728 Intrinsic::ID IID = IsConjFMA
16729 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
16730 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
16731 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16732 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16733 }
16734 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
16735 IsConjFMA = true;
16736 [[fallthrough]];
16737 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
16738 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16739 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16740 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16741 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
16742 return EmitX86Select(*this, And, Call, Ops[0]);
16743 }
16744 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
16745 IsConjFMA = true;
16746 [[fallthrough]];
16747 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
16748 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16749 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16750 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16751 static constexpr int Mask[] = {0, 5, 6, 7};
16752 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
16753 }
16754 case X86::BI__builtin_ia32_prefetchi:
16755 return Builder.CreateCall(
16756 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
16757 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
16758 llvm::ConstantInt::get(Int32Ty, 0)});
16759 }
16760}
16761
16762Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
16763 const CallExpr *E) {
16764 // Do not emit the builtin arguments in the arguments of a function call,
16765 // because the evaluation order of function arguments is not specified in C++.
16766 // This is important when testing to ensure the arguments are emitted in the
16767 // same order every time. Eg:
16768 // Instead of:
16769 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
16770 // EmitScalarExpr(E->getArg(1)), "swdiv");
16771 // Use:
16772 // Value *Op0 = EmitScalarExpr(E->getArg(0));
16773 // Value *Op1 = EmitScalarExpr(E->getArg(1));
16774 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
16775
16776 Intrinsic::ID ID = Intrinsic::not_intrinsic;
16777
16778#include "llvm/TargetParser/PPCTargetParser.def"
16779 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
16780 unsigned CompOp,
16781 unsigned OpValue) -> Value * {
16782 if (SupportMethod == AIX_BUILTIN_PPC_FALSE)
16783 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
16784
16785 if (SupportMethod == AIX_BUILTIN_PPC_TRUE)
16786 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
16787
16788 assert(SupportMethod <= USE_SYS_CONF && "Invalid value for SupportMethod.");
16789 assert((CompOp == COMP_EQ) && "Only equal comparisons are supported.");
16790
16791 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
16792 llvm::Constant *SysConf =
16793 CGM.CreateRuntimeVariable(STy, "_system_configuration");
16794
16795 // Grab the appropriate field from _system_configuration.
16796 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
16797 ConstantInt::get(Int32Ty, FieldIdx)};
16798
16799 llvm::Value *FieldValue = Builder.CreateGEP(STy, SysConf, Idxs);
16800 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
16802 assert(FieldValue->getType()->isIntegerTy(32) &&
16803 "Only 32-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
16804 return Builder.CreateICmp(ICmpInst::ICMP_EQ, FieldValue,
16805 ConstantInt::get(Int32Ty, OpValue));
16806 };
16807
16808 switch (BuiltinID) {
16809 default: return nullptr;
16810
16811 case Builtin::BI__builtin_cpu_is: {
16812 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16813 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16814 llvm::Triple Triple = getTarget().getTriple();
16815
16816 if (Triple.isOSAIX()) {
16817 unsigned IsCpuSupport, FieldIdx, CompareOp, CpuIdValue;
16818 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUType;
16819 std::tie(IsCpuSupport, FieldIdx, CompareOp, CpuIdValue) =
16820 static_cast<CPUType>(StringSwitch<CPUType>(CPUStr)
16821#define PPC_AIX_CPU(NAME, SUPPORT_MAGIC, INDEX, COMPARE_OP, VALUE) \
16822 .Case(NAME, {SUPPORT_MAGIC, INDEX, COMPARE_OP, VALUE})
16823#include "llvm/TargetParser/PPCTargetParser.def"
16824 );
16825 return GenAIXPPCBuiltinCpuExpr(IsCpuSupport, FieldIdx, CompareOp,
16826 CpuIdValue);
16827 }
16828
16829 assert(Triple.isOSLinux() &&
16830 "__builtin_cpu_is() is only supported for AIX and Linux.");
16831 unsigned NumCPUID = StringSwitch<unsigned>(CPUStr)
16832#define PPC_LNX_CPU(Name, NumericID) .Case(Name, NumericID)
16833#include "llvm/TargetParser/PPCTargetParser.def"
16834 .Default(-1U);
16835 assert(NumCPUID < -1U && "Invalid CPU name. Missed by SemaChecking?");
16836 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
16837 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16838 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
16839 return Builder.CreateICmpEQ(TheCall,
16840 llvm::ConstantInt::get(Int32Ty, NumCPUID));
16841 }
16842 case Builtin::BI__builtin_cpu_supports: {
16843 unsigned FeatureWord;
16844 unsigned BitMask;
16845 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16846 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16847 std::tie(FeatureWord, BitMask) =
16848 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
16849#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
16850 .Case(Name, {FA_WORD, Bitmask})
16851#include "llvm/TargetParser/PPCTargetParser.def"
16852 .Default({0, 0});
16853 if (!BitMask)
16854 return Builder.getFalse();
16855 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
16856 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16857 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
16858 Value *Mask =
16859 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
16860 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
16861#undef PPC_FAWORD_HWCAP
16862#undef PPC_FAWORD_HWCAP2
16863#undef PPC_FAWORD_CPUID
16864 }
16865
16866 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
16867 // call __builtin_readcyclecounter.
16868 case PPC::BI__builtin_ppc_get_timebase:
16869 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
16870
16871 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
16872 case PPC::BI__builtin_altivec_lvx:
16873 case PPC::BI__builtin_altivec_lvxl:
16874 case PPC::BI__builtin_altivec_lvebx:
16875 case PPC::BI__builtin_altivec_lvehx:
16876 case PPC::BI__builtin_altivec_lvewx:
16877 case PPC::BI__builtin_altivec_lvsl:
16878 case PPC::BI__builtin_altivec_lvsr:
16879 case PPC::BI__builtin_vsx_lxvd2x:
16880 case PPC::BI__builtin_vsx_lxvw4x:
16881 case PPC::BI__builtin_vsx_lxvd2x_be:
16882 case PPC::BI__builtin_vsx_lxvw4x_be:
16883 case PPC::BI__builtin_vsx_lxvl:
16884 case PPC::BI__builtin_vsx_lxvll:
16885 {
16887 Ops.push_back(EmitScalarExpr(E->getArg(0)));
16888 Ops.push_back(EmitScalarExpr(E->getArg(1)));
16889 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
16890 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
16891 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
16892 Ops.pop_back();
16893 }
16894
16895 switch (BuiltinID) {
16896 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
16897 case PPC::BI__builtin_altivec_lvx:
16898 ID = Intrinsic::ppc_altivec_lvx;
16899 break;
16900 case PPC::BI__builtin_altivec_lvxl:
16901 ID = Intrinsic::ppc_altivec_lvxl;
16902 break;
16903 case PPC::BI__builtin_altivec_lvebx:
16904 ID = Intrinsic::ppc_altivec_lvebx;
16905 break;
16906 case PPC::BI__builtin_altivec_lvehx:
16907 ID = Intrinsic::ppc_altivec_lvehx;
16908 break;
16909 case PPC::BI__builtin_altivec_lvewx:
16910 ID = Intrinsic::ppc_altivec_lvewx;
16911 break;
16912 case PPC::BI__builtin_altivec_lvsl:
16913 ID = Intrinsic::ppc_altivec_lvsl;
16914 break;
16915 case PPC::BI__builtin_altivec_lvsr:
16916 ID = Intrinsic::ppc_altivec_lvsr;
16917 break;
16918 case PPC::BI__builtin_vsx_lxvd2x:
16919 ID = Intrinsic::ppc_vsx_lxvd2x;
16920 break;
16921 case PPC::BI__builtin_vsx_lxvw4x:
16922 ID = Intrinsic::ppc_vsx_lxvw4x;
16923 break;
16924 case PPC::BI__builtin_vsx_lxvd2x_be:
16925 ID = Intrinsic::ppc_vsx_lxvd2x_be;
16926 break;
16927 case PPC::BI__builtin_vsx_lxvw4x_be:
16928 ID = Intrinsic::ppc_vsx_lxvw4x_be;
16929 break;
16930 case PPC::BI__builtin_vsx_lxvl:
16931 ID = Intrinsic::ppc_vsx_lxvl;
16932 break;
16933 case PPC::BI__builtin_vsx_lxvll:
16934 ID = Intrinsic::ppc_vsx_lxvll;
16935 break;
16936 }
16937 llvm::Function *F = CGM.getIntrinsic(ID);
16938 return Builder.CreateCall(F, Ops, "");
16939 }
16940
16941 // vec_st, vec_xst_be
16942 case PPC::BI__builtin_altivec_stvx:
16943 case PPC::BI__builtin_altivec_stvxl:
16944 case PPC::BI__builtin_altivec_stvebx:
16945 case PPC::BI__builtin_altivec_stvehx:
16946 case PPC::BI__builtin_altivec_stvewx:
16947 case PPC::BI__builtin_vsx_stxvd2x:
16948 case PPC::BI__builtin_vsx_stxvw4x:
16949 case PPC::BI__builtin_vsx_stxvd2x_be:
16950 case PPC::BI__builtin_vsx_stxvw4x_be:
16951 case PPC::BI__builtin_vsx_stxvl:
16952 case PPC::BI__builtin_vsx_stxvll:
16953 {
16955 Ops.push_back(EmitScalarExpr(E->getArg(0)));
16956 Ops.push_back(EmitScalarExpr(E->getArg(1)));
16957 Ops.push_back(EmitScalarExpr(E->getArg(2)));
16958 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
16959 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
16960 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
16961 Ops.pop_back();
16962 }
16963
16964 switch (BuiltinID) {
16965 default: llvm_unreachable("Unsupported st intrinsic!");
16966 case PPC::BI__builtin_altivec_stvx:
16967 ID = Intrinsic::ppc_altivec_stvx;
16968 break;
16969 case PPC::BI__builtin_altivec_stvxl:
16970 ID = Intrinsic::ppc_altivec_stvxl;
16971 break;
16972 case PPC::BI__builtin_altivec_stvebx:
16973 ID = Intrinsic::ppc_altivec_stvebx;
16974 break;
16975 case PPC::BI__builtin_altivec_stvehx:
16976 ID = Intrinsic::ppc_altivec_stvehx;
16977 break;
16978 case PPC::BI__builtin_altivec_stvewx:
16979 ID = Intrinsic::ppc_altivec_stvewx;
16980 break;
16981 case PPC::BI__builtin_vsx_stxvd2x:
16982 ID = Intrinsic::ppc_vsx_stxvd2x;
16983 break;
16984 case PPC::BI__builtin_vsx_stxvw4x:
16985 ID = Intrinsic::ppc_vsx_stxvw4x;
16986 break;
16987 case PPC::BI__builtin_vsx_stxvd2x_be:
16988 ID = Intrinsic::ppc_vsx_stxvd2x_be;
16989 break;
16990 case PPC::BI__builtin_vsx_stxvw4x_be:
16991 ID = Intrinsic::ppc_vsx_stxvw4x_be;
16992 break;
16993 case PPC::BI__builtin_vsx_stxvl:
16994 ID = Intrinsic::ppc_vsx_stxvl;
16995 break;
16996 case PPC::BI__builtin_vsx_stxvll:
16997 ID = Intrinsic::ppc_vsx_stxvll;
16998 break;
16999 }
17000 llvm::Function *F = CGM.getIntrinsic(ID);
17001 return Builder.CreateCall(F, Ops, "");
17002 }
17003 case PPC::BI__builtin_vsx_ldrmb: {
17004 // Essentially boils down to performing an unaligned VMX load sequence so
17005 // as to avoid crossing a page boundary and then shuffling the elements
17006 // into the right side of the vector register.
17007 Value *Op0 = EmitScalarExpr(E->getArg(0));
17008 Value *Op1 = EmitScalarExpr(E->getArg(1));
17009 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17010 llvm::Type *ResTy = ConvertType(E->getType());
17011 bool IsLE = getTarget().isLittleEndian();
17012
17013 // If the user wants the entire vector, just load the entire vector.
17014 if (NumBytes == 16) {
17015 Value *LD =
17017 if (!IsLE)
17018 return LD;
17019
17020 // Reverse the bytes on LE.
17021 SmallVector<int, 16> RevMask;
17022 for (int Idx = 0; Idx < 16; Idx++)
17023 RevMask.push_back(15 - Idx);
17024 return Builder.CreateShuffleVector(LD, LD, RevMask);
17025 }
17026
17027 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17028 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17029 : Intrinsic::ppc_altivec_lvsl);
17030 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17031 Value *HiMem = Builder.CreateGEP(
17032 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
17033 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
17034 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
17035 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
17036
17037 Op0 = IsLE ? HiLd : LoLd;
17038 Op1 = IsLE ? LoLd : HiLd;
17039 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
17040 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
17041
17042 if (IsLE) {
17043 SmallVector<int, 16> Consts;
17044 for (int Idx = 0; Idx < 16; Idx++) {
17045 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17046 : 16 - (NumBytes - Idx);
17047 Consts.push_back(Val);
17048 }
17049 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
17050 Zero, Consts);
17051 }
17053 for (int Idx = 0; Idx < 16; Idx++)
17054 Consts.push_back(Builder.getInt8(NumBytes + Idx));
17055 Value *Mask2 = ConstantVector::get(Consts);
17056 return Builder.CreateBitCast(
17057 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
17058 }
17059 case PPC::BI__builtin_vsx_strmb: {
17060 Value *Op0 = EmitScalarExpr(E->getArg(0));
17061 Value *Op1 = EmitScalarExpr(E->getArg(1));
17062 Value *Op2 = EmitScalarExpr(E->getArg(2));
17063 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17064 bool IsLE = getTarget().isLittleEndian();
17065 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17066 // Storing the whole vector, simply store it on BE and reverse bytes and
17067 // store on LE.
17068 if (Width == 16) {
17069 Value *StVec = Op2;
17070 if (IsLE) {
17071 SmallVector<int, 16> RevMask;
17072 for (int Idx = 0; Idx < 16; Idx++)
17073 RevMask.push_back(15 - Idx);
17074 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
17075 }
17076 return Builder.CreateStore(
17077 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
17078 }
17079 auto *ConvTy = Int64Ty;
17080 unsigned NumElts = 0;
17081 switch (Width) {
17082 default:
17083 llvm_unreachable("width for stores must be a power of 2");
17084 case 8:
17085 ConvTy = Int64Ty;
17086 NumElts = 2;
17087 break;
17088 case 4:
17089 ConvTy = Int32Ty;
17090 NumElts = 4;
17091 break;
17092 case 2:
17093 ConvTy = Int16Ty;
17094 NumElts = 8;
17095 break;
17096 case 1:
17097 ConvTy = Int8Ty;
17098 NumElts = 16;
17099 break;
17100 }
17101 Value *Vec = Builder.CreateBitCast(
17102 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
17103 Value *Ptr =
17104 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
17105 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
17106 if (IsLE && Width > 1) {
17107 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
17108 Elt = Builder.CreateCall(F, Elt);
17109 }
17110 return Builder.CreateStore(
17111 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
17112 };
17113 unsigned Stored = 0;
17114 unsigned RemainingBytes = NumBytes;
17115 Value *Result;
17116 if (NumBytes == 16)
17117 return StoreSubVec(16, 0, 0);
17118 if (NumBytes >= 8) {
17119 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
17120 RemainingBytes -= 8;
17121 Stored += 8;
17122 }
17123 if (RemainingBytes >= 4) {
17124 Result = StoreSubVec(4, NumBytes - Stored - 4,
17125 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
17126 RemainingBytes -= 4;
17127 Stored += 4;
17128 }
17129 if (RemainingBytes >= 2) {
17130 Result = StoreSubVec(2, NumBytes - Stored - 2,
17131 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17132 RemainingBytes -= 2;
17133 Stored += 2;
17134 }
17135 if (RemainingBytes)
17136 Result =
17137 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
17138 return Result;
17139 }
17140 // Square root
17141 case PPC::BI__builtin_vsx_xvsqrtsp:
17142 case PPC::BI__builtin_vsx_xvsqrtdp: {
17143 llvm::Type *ResultType = ConvertType(E->getType());
17144 Value *X = EmitScalarExpr(E->getArg(0));
17145 if (Builder.getIsFPConstrained()) {
17146 llvm::Function *F = CGM.getIntrinsic(
17147 Intrinsic::experimental_constrained_sqrt, ResultType);
17148 return Builder.CreateConstrainedFPCall(F, X);
17149 } else {
17150 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17151 return Builder.CreateCall(F, X);
17152 }
17153 }
17154 // Count leading zeros
17155 case PPC::BI__builtin_altivec_vclzb:
17156 case PPC::BI__builtin_altivec_vclzh:
17157 case PPC::BI__builtin_altivec_vclzw:
17158 case PPC::BI__builtin_altivec_vclzd: {
17159 llvm::Type *ResultType = ConvertType(E->getType());
17160 Value *X = EmitScalarExpr(E->getArg(0));
17161 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17162 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
17163 return Builder.CreateCall(F, {X, Undef});
17164 }
17165 case PPC::BI__builtin_altivec_vctzb:
17166 case PPC::BI__builtin_altivec_vctzh:
17167 case PPC::BI__builtin_altivec_vctzw:
17168 case PPC::BI__builtin_altivec_vctzd: {
17169 llvm::Type *ResultType = ConvertType(E->getType());
17170 Value *X = EmitScalarExpr(E->getArg(0));
17171 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17172 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
17173 return Builder.CreateCall(F, {X, Undef});
17174 }
17175 case PPC::BI__builtin_altivec_vinsd:
17176 case PPC::BI__builtin_altivec_vinsw:
17177 case PPC::BI__builtin_altivec_vinsd_elt:
17178 case PPC::BI__builtin_altivec_vinsw_elt: {
17179 llvm::Type *ResultType = ConvertType(E->getType());
17180 Value *Op0 = EmitScalarExpr(E->getArg(0));
17181 Value *Op1 = EmitScalarExpr(E->getArg(1));
17182 Value *Op2 = EmitScalarExpr(E->getArg(2));
17183
17184 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17185 BuiltinID == PPC::BI__builtin_altivec_vinsd);
17186
17187 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17188 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
17189
17190 // The third argument must be a compile time constant.
17191 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17192 assert(ArgCI &&
17193 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
17194
17195 // Valid value for the third argument is dependent on the input type and
17196 // builtin called.
17197 int ValidMaxValue = 0;
17198 if (IsUnaligned)
17199 ValidMaxValue = (Is32bit) ? 12 : 8;
17200 else
17201 ValidMaxValue = (Is32bit) ? 3 : 1;
17202
17203 // Get value of third argument.
17204 int64_t ConstArg = ArgCI->getSExtValue();
17205
17206 // Compose range checking error message.
17207 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
17208 RangeErrMsg += " number " + llvm::to_string(ConstArg);
17209 RangeErrMsg += " is outside of the valid range [0, ";
17210 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
17211
17212 // Issue error if third argument is not within the valid range.
17213 if (ConstArg < 0 || ConstArg > ValidMaxValue)
17214 CGM.Error(E->getExprLoc(), RangeErrMsg);
17215
17216 // Input to vec_replace_elt is an element index, convert to byte index.
17217 if (!IsUnaligned) {
17218 ConstArg *= Is32bit ? 4 : 8;
17219 // Fix the constant according to endianess.
17220 if (getTarget().isLittleEndian())
17221 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
17222 }
17223
17224 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
17225 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
17226 // Casting input to vector int as per intrinsic definition.
17227 Op0 =
17228 Is32bit
17229 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
17230 : Builder.CreateBitCast(Op0,
17231 llvm::FixedVectorType::get(Int64Ty, 2));
17232 return Builder.CreateBitCast(
17233 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
17234 }
17235 case PPC::BI__builtin_altivec_vpopcntb:
17236 case PPC::BI__builtin_altivec_vpopcnth:
17237 case PPC::BI__builtin_altivec_vpopcntw:
17238 case PPC::BI__builtin_altivec_vpopcntd: {
17239 llvm::Type *ResultType = ConvertType(E->getType());
17240 Value *X = EmitScalarExpr(E->getArg(0));
17241 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
17242 return Builder.CreateCall(F, X);
17243 }
17244 case PPC::BI__builtin_altivec_vadduqm:
17245 case PPC::BI__builtin_altivec_vsubuqm: {
17246 Value *Op0 = EmitScalarExpr(E->getArg(0));
17247 Value *Op1 = EmitScalarExpr(E->getArg(1));
17248 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17249 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
17250 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
17251 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
17252 return Builder.CreateAdd(Op0, Op1, "vadduqm");
17253 else
17254 return Builder.CreateSub(Op0, Op1, "vsubuqm");
17255 }
17256 case PPC::BI__builtin_altivec_vaddcuq_c:
17257 case PPC::BI__builtin_altivec_vsubcuq_c: {
17259 Value *Op0 = EmitScalarExpr(E->getArg(0));
17260 Value *Op1 = EmitScalarExpr(E->getArg(1));
17261 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17262 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17263 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17264 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17265 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
17266 ? Intrinsic::ppc_altivec_vaddcuq
17267 : Intrinsic::ppc_altivec_vsubcuq;
17268 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17269 }
17270 case PPC::BI__builtin_altivec_vaddeuqm_c:
17271 case PPC::BI__builtin_altivec_vaddecuq_c:
17272 case PPC::BI__builtin_altivec_vsubeuqm_c:
17273 case PPC::BI__builtin_altivec_vsubecuq_c: {
17275 Value *Op0 = EmitScalarExpr(E->getArg(0));
17276 Value *Op1 = EmitScalarExpr(E->getArg(1));
17277 Value *Op2 = EmitScalarExpr(E->getArg(2));
17278 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17279 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17280 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17281 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17282 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
17283 switch (BuiltinID) {
17284 default:
17285 llvm_unreachable("Unsupported intrinsic!");
17286 case PPC::BI__builtin_altivec_vaddeuqm_c:
17287 ID = Intrinsic::ppc_altivec_vaddeuqm;
17288 break;
17289 case PPC::BI__builtin_altivec_vaddecuq_c:
17290 ID = Intrinsic::ppc_altivec_vaddecuq;
17291 break;
17292 case PPC::BI__builtin_altivec_vsubeuqm_c:
17293 ID = Intrinsic::ppc_altivec_vsubeuqm;
17294 break;
17295 case PPC::BI__builtin_altivec_vsubecuq_c:
17296 ID = Intrinsic::ppc_altivec_vsubecuq;
17297 break;
17298 }
17299 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17300 }
17301 case PPC::BI__builtin_ppc_rldimi:
17302 case PPC::BI__builtin_ppc_rlwimi: {
17303 Value *Op0 = EmitScalarExpr(E->getArg(0));
17304 Value *Op1 = EmitScalarExpr(E->getArg(1));
17305 Value *Op2 = EmitScalarExpr(E->getArg(2));
17306 Value *Op3 = EmitScalarExpr(E->getArg(3));
17307 // rldimi is 64-bit instruction, expand the intrinsic before isel to
17308 // leverage peephole and avoid legalization efforts.
17309 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
17310 !getTarget().getTriple().isPPC64()) {
17311 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
17312 Op2 = Builder.CreateZExt(Op2, Int64Ty);
17313 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
17314 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
17315 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
17316 }
17317 return Builder.CreateCall(
17318 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
17319 ? Intrinsic::ppc_rldimi
17320 : Intrinsic::ppc_rlwimi),
17321 {Op0, Op1, Op2, Op3});
17322 }
17323 case PPC::BI__builtin_ppc_rlwnm: {
17324 Value *Op0 = EmitScalarExpr(E->getArg(0));
17325 Value *Op1 = EmitScalarExpr(E->getArg(1));
17326 Value *Op2 = EmitScalarExpr(E->getArg(2));
17327 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
17328 {Op0, Op1, Op2});
17329 }
17330 case PPC::BI__builtin_ppc_poppar4:
17331 case PPC::BI__builtin_ppc_poppar8: {
17332 Value *Op0 = EmitScalarExpr(E->getArg(0));
17333 llvm::Type *ArgType = Op0->getType();
17334 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
17335 Value *Tmp = Builder.CreateCall(F, Op0);
17336
17337 llvm::Type *ResultType = ConvertType(E->getType());
17338 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
17339 if (Result->getType() != ResultType)
17340 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
17341 "cast");
17342 return Result;
17343 }
17344 case PPC::BI__builtin_ppc_cmpb: {
17345 Value *Op0 = EmitScalarExpr(E->getArg(0));
17346 Value *Op1 = EmitScalarExpr(E->getArg(1));
17347 if (getTarget().getTriple().isPPC64()) {
17348 Function *F =
17349 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
17350 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
17351 }
17352 // For 32 bit, emit the code as below:
17353 // %conv = trunc i64 %a to i32
17354 // %conv1 = trunc i64 %b to i32
17355 // %shr = lshr i64 %a, 32
17356 // %conv2 = trunc i64 %shr to i32
17357 // %shr3 = lshr i64 %b, 32
17358 // %conv4 = trunc i64 %shr3 to i32
17359 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
17360 // %conv5 = zext i32 %0 to i64
17361 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
17362 // %conv614 = zext i32 %1 to i64
17363 // %shl = shl nuw i64 %conv614, 32
17364 // %or = or i64 %shl, %conv5
17365 // ret i64 %or
17366 Function *F =
17367 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
17368 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
17369 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
17370 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
17371 Value *ArgOneHi =
17372 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
17373 Value *ArgTwoHi =
17374 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
17375 Value *ResLo = Builder.CreateZExt(
17376 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
17377 Value *ResHiShift = Builder.CreateZExt(
17378 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
17379 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
17380 return Builder.CreateOr(ResLo, ResHi);
17381 }
17382 // Copy sign
17383 case PPC::BI__builtin_vsx_xvcpsgnsp:
17384 case PPC::BI__builtin_vsx_xvcpsgndp: {
17385 llvm::Type *ResultType = ConvertType(E->getType());
17386 Value *X = EmitScalarExpr(E->getArg(0));
17387 Value *Y = EmitScalarExpr(E->getArg(1));
17388 ID = Intrinsic::copysign;
17389 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17390 return Builder.CreateCall(F, {X, Y});
17391 }
17392 // Rounding/truncation
17393 case PPC::BI__builtin_vsx_xvrspip:
17394 case PPC::BI__builtin_vsx_xvrdpip:
17395 case PPC::BI__builtin_vsx_xvrdpim:
17396 case PPC::BI__builtin_vsx_xvrspim:
17397 case PPC::BI__builtin_vsx_xvrdpi:
17398 case PPC::BI__builtin_vsx_xvrspi:
17399 case PPC::BI__builtin_vsx_xvrdpic:
17400 case PPC::BI__builtin_vsx_xvrspic:
17401 case PPC::BI__builtin_vsx_xvrdpiz:
17402 case PPC::BI__builtin_vsx_xvrspiz: {
17403 llvm::Type *ResultType = ConvertType(E->getType());
17404 Value *X = EmitScalarExpr(E->getArg(0));
17405 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
17406 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
17407 ID = Builder.getIsFPConstrained()
17408 ? Intrinsic::experimental_constrained_floor
17409 : Intrinsic::floor;
17410 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
17411 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
17412 ID = Builder.getIsFPConstrained()
17413 ? Intrinsic::experimental_constrained_round
17414 : Intrinsic::round;
17415 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
17416 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
17417 ID = Builder.getIsFPConstrained()
17418 ? Intrinsic::experimental_constrained_rint
17419 : Intrinsic::rint;
17420 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
17421 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
17422 ID = Builder.getIsFPConstrained()
17423 ? Intrinsic::experimental_constrained_ceil
17424 : Intrinsic::ceil;
17425 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
17426 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
17427 ID = Builder.getIsFPConstrained()
17428 ? Intrinsic::experimental_constrained_trunc
17429 : Intrinsic::trunc;
17430 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17431 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
17432 : Builder.CreateCall(F, X);
17433 }
17434
17435 // Absolute value
17436 case PPC::BI__builtin_vsx_xvabsdp:
17437 case PPC::BI__builtin_vsx_xvabssp: {
17438 llvm::Type *ResultType = ConvertType(E->getType());
17439 Value *X = EmitScalarExpr(E->getArg(0));
17440 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17441 return Builder.CreateCall(F, X);
17442 }
17443
17444 // Fastmath by default
17445 case PPC::BI__builtin_ppc_recipdivf:
17446 case PPC::BI__builtin_ppc_recipdivd:
17447 case PPC::BI__builtin_ppc_rsqrtf:
17448 case PPC::BI__builtin_ppc_rsqrtd: {
17449 FastMathFlags FMF = Builder.getFastMathFlags();
17450 Builder.getFastMathFlags().setFast();
17451 llvm::Type *ResultType = ConvertType(E->getType());
17452 Value *X = EmitScalarExpr(E->getArg(0));
17453
17454 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
17455 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
17456 Value *Y = EmitScalarExpr(E->getArg(1));
17457 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
17458 Builder.getFastMathFlags() &= (FMF);
17459 return FDiv;
17460 }
17461 auto *One = ConstantFP::get(ResultType, 1.0);
17462 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17463 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
17464 Builder.getFastMathFlags() &= (FMF);
17465 return FDiv;
17466 }
17467 case PPC::BI__builtin_ppc_alignx: {
17468 Value *Op0 = EmitScalarExpr(E->getArg(0));
17469 Value *Op1 = EmitScalarExpr(E->getArg(1));
17470 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
17471 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
17472 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
17473 llvm::Value::MaximumAlignment);
17474
17476 /*The expr loc is sufficient.*/ SourceLocation(),
17477 AlignmentCI, nullptr);
17478 return Op1;
17479 }
17480 case PPC::BI__builtin_ppc_rdlam: {
17481 Value *Op0 = EmitScalarExpr(E->getArg(0));
17482 Value *Op1 = EmitScalarExpr(E->getArg(1));
17483 Value *Op2 = EmitScalarExpr(E->getArg(2));
17484 llvm::Type *Ty = Op0->getType();
17485 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
17486 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
17487 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
17488 return Builder.CreateAnd(Rotate, Op2);
17489 }
17490 case PPC::BI__builtin_ppc_load2r: {
17491 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
17492 Value *Op0 = EmitScalarExpr(E->getArg(0));
17493 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
17494 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
17495 }
17496 // FMA variations
17497 case PPC::BI__builtin_ppc_fnmsub:
17498 case PPC::BI__builtin_ppc_fnmsubs:
17499 case PPC::BI__builtin_vsx_xvmaddadp:
17500 case PPC::BI__builtin_vsx_xvmaddasp:
17501 case PPC::BI__builtin_vsx_xvnmaddadp:
17502 case PPC::BI__builtin_vsx_xvnmaddasp:
17503 case PPC::BI__builtin_vsx_xvmsubadp:
17504 case PPC::BI__builtin_vsx_xvmsubasp:
17505 case PPC::BI__builtin_vsx_xvnmsubadp:
17506 case PPC::BI__builtin_vsx_xvnmsubasp: {
17507 llvm::Type *ResultType = ConvertType(E->getType());
17508 Value *X = EmitScalarExpr(E->getArg(0));
17509 Value *Y = EmitScalarExpr(E->getArg(1));
17510 Value *Z = EmitScalarExpr(E->getArg(2));
17511 llvm::Function *F;
17512 if (Builder.getIsFPConstrained())
17513 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17514 else
17515 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17516 switch (BuiltinID) {
17517 case PPC::BI__builtin_vsx_xvmaddadp:
17518 case PPC::BI__builtin_vsx_xvmaddasp:
17519 if (Builder.getIsFPConstrained())
17520 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17521 else
17522 return Builder.CreateCall(F, {X, Y, Z});
17523 case PPC::BI__builtin_vsx_xvnmaddadp:
17524 case PPC::BI__builtin_vsx_xvnmaddasp:
17525 if (Builder.getIsFPConstrained())
17526 return Builder.CreateFNeg(
17527 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17528 else
17529 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17530 case PPC::BI__builtin_vsx_xvmsubadp:
17531 case PPC::BI__builtin_vsx_xvmsubasp:
17532 if (Builder.getIsFPConstrained())
17533 return Builder.CreateConstrainedFPCall(
17534 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17535 else
17536 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17537 case PPC::BI__builtin_ppc_fnmsub:
17538 case PPC::BI__builtin_ppc_fnmsubs:
17539 case PPC::BI__builtin_vsx_xvnmsubadp:
17540 case PPC::BI__builtin_vsx_xvnmsubasp:
17541 if (Builder.getIsFPConstrained())
17542 return Builder.CreateFNeg(
17543 Builder.CreateConstrainedFPCall(
17544 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
17545 "neg");
17546 else
17547 return Builder.CreateCall(
17548 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
17549 }
17550 llvm_unreachable("Unknown FMA operation");
17551 return nullptr; // Suppress no-return warning
17552 }
17553
17554 case PPC::BI__builtin_vsx_insertword: {
17555 Value *Op0 = EmitScalarExpr(E->getArg(0));
17556 Value *Op1 = EmitScalarExpr(E->getArg(1));
17557 Value *Op2 = EmitScalarExpr(E->getArg(2));
17558 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
17559
17560 // Third argument is a compile time constant int. It must be clamped to
17561 // to the range [0, 12].
17562 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17563 assert(ArgCI &&
17564 "Third arg to xxinsertw intrinsic must be constant integer");
17565 const int64_t MaxIndex = 12;
17566 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17567
17568 // The builtin semantics don't exactly match the xxinsertw instructions
17569 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
17570 // word from the first argument, and inserts it in the second argument. The
17571 // instruction extracts the word from its second input register and inserts
17572 // it into its first input register, so swap the first and second arguments.
17573 std::swap(Op0, Op1);
17574
17575 // Need to cast the second argument from a vector of unsigned int to a
17576 // vector of long long.
17577 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17578
17579 if (getTarget().isLittleEndian()) {
17580 // Reverse the double words in the vector we will extract from.
17581 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17582 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
17583
17584 // Reverse the index.
17585 Index = MaxIndex - Index;
17586 }
17587
17588 // Intrinsic expects the first arg to be a vector of int.
17589 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17590 Op2 = ConstantInt::getSigned(Int32Ty, Index);
17591 return Builder.CreateCall(F, {Op0, Op1, Op2});
17592 }
17593
17594 case PPC::BI__builtin_vsx_extractuword: {
17595 Value *Op0 = EmitScalarExpr(E->getArg(0));
17596 Value *Op1 = EmitScalarExpr(E->getArg(1));
17597 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
17598
17599 // Intrinsic expects the first argument to be a vector of doublewords.
17600 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17601
17602 // The second argument is a compile time constant int that needs to
17603 // be clamped to the range [0, 12].
17604 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
17605 assert(ArgCI &&
17606 "Second Arg to xxextractuw intrinsic must be a constant integer!");
17607 const int64_t MaxIndex = 12;
17608 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17609
17610 if (getTarget().isLittleEndian()) {
17611 // Reverse the index.
17612 Index = MaxIndex - Index;
17613 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17614
17615 // Emit the call, then reverse the double words of the results vector.
17616 Value *Call = Builder.CreateCall(F, {Op0, Op1});
17617
17618 Value *ShuffleCall =
17619 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
17620 return ShuffleCall;
17621 } else {
17622 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17623 return Builder.CreateCall(F, {Op0, Op1});
17624 }
17625 }
17626
17627 case PPC::BI__builtin_vsx_xxpermdi: {
17628 Value *Op0 = EmitScalarExpr(E->getArg(0));
17629 Value *Op1 = EmitScalarExpr(E->getArg(1));
17630 Value *Op2 = EmitScalarExpr(E->getArg(2));
17631 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17632 assert(ArgCI && "Third arg must be constant integer!");
17633
17634 unsigned Index = ArgCI->getZExtValue();
17635 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17636 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17637
17638 // Account for endianness by treating this as just a shuffle. So we use the
17639 // same indices for both LE and BE in order to produce expected results in
17640 // both cases.
17641 int ElemIdx0 = (Index & 2) >> 1;
17642 int ElemIdx1 = 2 + (Index & 1);
17643
17644 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
17645 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17646 QualType BIRetType = E->getType();
17647 auto RetTy = ConvertType(BIRetType);
17648 return Builder.CreateBitCast(ShuffleCall, RetTy);
17649 }
17650
17651 case PPC::BI__builtin_vsx_xxsldwi: {
17652 Value *Op0 = EmitScalarExpr(E->getArg(0));
17653 Value *Op1 = EmitScalarExpr(E->getArg(1));
17654 Value *Op2 = EmitScalarExpr(E->getArg(2));
17655 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17656 assert(ArgCI && "Third argument must be a compile time constant");
17657 unsigned Index = ArgCI->getZExtValue() & 0x3;
17658 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17659 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
17660
17661 // Create a shuffle mask
17662 int ElemIdx0;
17663 int ElemIdx1;
17664 int ElemIdx2;
17665 int ElemIdx3;
17666 if (getTarget().isLittleEndian()) {
17667 // Little endian element N comes from element 8+N-Index of the
17668 // concatenated wide vector (of course, using modulo arithmetic on
17669 // the total number of elements).
17670 ElemIdx0 = (8 - Index) % 8;
17671 ElemIdx1 = (9 - Index) % 8;
17672 ElemIdx2 = (10 - Index) % 8;
17673 ElemIdx3 = (11 - Index) % 8;
17674 } else {
17675 // Big endian ElemIdx<N> = Index + N
17676 ElemIdx0 = Index;
17677 ElemIdx1 = Index + 1;
17678 ElemIdx2 = Index + 2;
17679 ElemIdx3 = Index + 3;
17680 }
17681
17682 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
17683 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17684 QualType BIRetType = E->getType();
17685 auto RetTy = ConvertType(BIRetType);
17686 return Builder.CreateBitCast(ShuffleCall, RetTy);
17687 }
17688
17689 case PPC::BI__builtin_pack_vector_int128: {
17690 Value *Op0 = EmitScalarExpr(E->getArg(0));
17691 Value *Op1 = EmitScalarExpr(E->getArg(1));
17692 bool isLittleEndian = getTarget().isLittleEndian();
17693 Value *PoisonValue =
17694 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
17695 Value *Res = Builder.CreateInsertElement(
17696 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
17697 Res = Builder.CreateInsertElement(Res, Op1,
17698 (uint64_t)(isLittleEndian ? 0 : 1));
17699 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
17700 }
17701
17702 case PPC::BI__builtin_unpack_vector_int128: {
17703 Value *Op0 = EmitScalarExpr(E->getArg(0));
17704 Value *Op1 = EmitScalarExpr(E->getArg(1));
17705 ConstantInt *Index = cast<ConstantInt>(Op1);
17706 Value *Unpacked = Builder.CreateBitCast(
17707 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
17708
17709 if (getTarget().isLittleEndian())
17710 Index =
17711 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
17712
17713 return Builder.CreateExtractElement(Unpacked, Index);
17714 }
17715
17716 case PPC::BI__builtin_ppc_sthcx: {
17717 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
17718 Value *Op0 = EmitScalarExpr(E->getArg(0));
17719 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
17720 return Builder.CreateCall(F, {Op0, Op1});
17721 }
17722
17723 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
17724 // Some of the MMA instructions accumulate their result into an existing
17725 // accumulator whereas the others generate a new accumulator. So we need to
17726 // use custom code generation to expand a builtin call with a pointer to a
17727 // load (if the corresponding instruction accumulates its result) followed by
17728 // the call to the intrinsic and a store of the result.
17729#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
17730 case PPC::BI__builtin_##Name:
17731#include "clang/Basic/BuiltinsPPC.def"
17732 {
17734 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
17735 if (E->getArg(i)->getType()->isArrayType())
17736 Ops.push_back(
17738 else
17739 Ops.push_back(EmitScalarExpr(E->getArg(i)));
17740 // The first argument of these two builtins is a pointer used to store their
17741 // result. However, the llvm intrinsics return their result in multiple
17742 // return values. So, here we emit code extracting these values from the
17743 // intrinsic results and storing them using that pointer.
17744 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
17745 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
17746 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
17747 unsigned NumVecs = 2;
17748 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
17749 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
17750 NumVecs = 4;
17751 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
17752 }
17753 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
17755 Value *Vec = Builder.CreateLoad(Addr);
17756 Value *Call = Builder.CreateCall(F, {Vec});
17757 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
17758 Value *Ptr = Ops[0];
17759 for (unsigned i=0; i<NumVecs; i++) {
17760 Value *Vec = Builder.CreateExtractValue(Call, i);
17761 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
17762 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
17763 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
17764 }
17765 return Call;
17766 }
17767 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
17768 BuiltinID == PPC::BI__builtin_mma_build_acc) {
17769 // Reverse the order of the operands for LE, so the
17770 // same builtin call can be used on both LE and BE
17771 // without the need for the programmer to swap operands.
17772 // The operands are reversed starting from the second argument,
17773 // the first operand is the pointer to the pair/accumulator
17774 // that is being built.
17775 if (getTarget().isLittleEndian())
17776 std::reverse(Ops.begin() + 1, Ops.end());
17777 }
17778 bool Accumulate;
17779 switch (BuiltinID) {
17780 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
17781 case PPC::BI__builtin_##Name: \
17782 ID = Intrinsic::ppc_##Intr; \
17783 Accumulate = Acc; \
17784 break;
17785 #include "clang/Basic/BuiltinsPPC.def"
17786 }
17787 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17788 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
17789 BuiltinID == PPC::BI__builtin_mma_lxvp ||
17790 BuiltinID == PPC::BI__builtin_mma_stxvp) {
17791 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17792 BuiltinID == PPC::BI__builtin_mma_lxvp) {
17793 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17794 } else {
17795 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17796 }
17797 Ops.pop_back();
17798 llvm::Function *F = CGM.getIntrinsic(ID);
17799 return Builder.CreateCall(F, Ops, "");
17800 }
17801 SmallVector<Value*, 4> CallOps;
17802 if (Accumulate) {
17804 Value *Acc = Builder.CreateLoad(Addr);
17805 CallOps.push_back(Acc);
17806 }
17807 for (unsigned i=1; i<Ops.size(); i++)
17808 CallOps.push_back(Ops[i]);
17809 llvm::Function *F = CGM.getIntrinsic(ID);
17810 Value *Call = Builder.CreateCall(F, CallOps);
17811 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
17812 }
17813
17814 case PPC::BI__builtin_ppc_compare_and_swap:
17815 case PPC::BI__builtin_ppc_compare_and_swaplp: {
17817 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
17818 Value *OldVal = Builder.CreateLoad(OldValAddr);
17819 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
17820 LValue LV = MakeAddrLValue(Addr, AtomicTy);
17821 Value *Op2 = EmitScalarExpr(E->getArg(2));
17822 auto Pair = EmitAtomicCompareExchange(
17823 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
17824 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
17825 // Unlike c11's atomic_compare_exchange, according to
17826 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
17827 // > In either case, the contents of the memory location specified by addr
17828 // > are copied into the memory location specified by old_val_addr.
17829 // But it hasn't specified storing to OldValAddr is atomic or not and
17830 // which order to use. Now following XL's codegen, treat it as a normal
17831 // store.
17832 Value *LoadedVal = Pair.first.getScalarVal();
17833 Builder.CreateStore(LoadedVal, OldValAddr);
17834 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
17835 }
17836 case PPC::BI__builtin_ppc_fetch_and_add:
17837 case PPC::BI__builtin_ppc_fetch_and_addlp: {
17838 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
17839 llvm::AtomicOrdering::Monotonic);
17840 }
17841 case PPC::BI__builtin_ppc_fetch_and_and:
17842 case PPC::BI__builtin_ppc_fetch_and_andlp: {
17843 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
17844 llvm::AtomicOrdering::Monotonic);
17845 }
17846
17847 case PPC::BI__builtin_ppc_fetch_and_or:
17848 case PPC::BI__builtin_ppc_fetch_and_orlp: {
17849 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
17850 llvm::AtomicOrdering::Monotonic);
17851 }
17852 case PPC::BI__builtin_ppc_fetch_and_swap:
17853 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
17854 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
17855 llvm::AtomicOrdering::Monotonic);
17856 }
17857 case PPC::BI__builtin_ppc_ldarx:
17858 case PPC::BI__builtin_ppc_lwarx:
17859 case PPC::BI__builtin_ppc_lharx:
17860 case PPC::BI__builtin_ppc_lbarx:
17861 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
17862 case PPC::BI__builtin_ppc_mfspr: {
17863 Value *Op0 = EmitScalarExpr(E->getArg(0));
17864 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17865 ? Int32Ty
17866 : Int64Ty;
17867 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
17868 return Builder.CreateCall(F, {Op0});
17869 }
17870 case PPC::BI__builtin_ppc_mtspr: {
17871 Value *Op0 = EmitScalarExpr(E->getArg(0));
17872 Value *Op1 = EmitScalarExpr(E->getArg(1));
17873 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17874 ? Int32Ty
17875 : Int64Ty;
17876 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
17877 return Builder.CreateCall(F, {Op0, Op1});
17878 }
17879 case PPC::BI__builtin_ppc_popcntb: {
17880 Value *ArgValue = EmitScalarExpr(E->getArg(0));
17881 llvm::Type *ArgType = ArgValue->getType();
17882 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
17883 return Builder.CreateCall(F, {ArgValue}, "popcntb");
17884 }
17885 case PPC::BI__builtin_ppc_mtfsf: {
17886 // The builtin takes a uint32 that needs to be cast to an
17887 // f64 to be passed to the intrinsic.
17888 Value *Op0 = EmitScalarExpr(E->getArg(0));
17889 Value *Op1 = EmitScalarExpr(E->getArg(1));
17890 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
17891 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
17892 return Builder.CreateCall(F, {Op0, Cast}, "");
17893 }
17894
17895 case PPC::BI__builtin_ppc_swdiv_nochk:
17896 case PPC::BI__builtin_ppc_swdivs_nochk: {
17897 Value *Op0 = EmitScalarExpr(E->getArg(0));
17898 Value *Op1 = EmitScalarExpr(E->getArg(1));
17899 FastMathFlags FMF = Builder.getFastMathFlags();
17900 Builder.getFastMathFlags().setFast();
17901 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
17902 Builder.getFastMathFlags() &= (FMF);
17903 return FDiv;
17904 }
17905 case PPC::BI__builtin_ppc_fric:
17907 *this, E, Intrinsic::rint,
17908 Intrinsic::experimental_constrained_rint))
17909 .getScalarVal();
17910 case PPC::BI__builtin_ppc_frim:
17911 case PPC::BI__builtin_ppc_frims:
17913 *this, E, Intrinsic::floor,
17914 Intrinsic::experimental_constrained_floor))
17915 .getScalarVal();
17916 case PPC::BI__builtin_ppc_frin:
17917 case PPC::BI__builtin_ppc_frins:
17919 *this, E, Intrinsic::round,
17920 Intrinsic::experimental_constrained_round))
17921 .getScalarVal();
17922 case PPC::BI__builtin_ppc_frip:
17923 case PPC::BI__builtin_ppc_frips:
17925 *this, E, Intrinsic::ceil,
17926 Intrinsic::experimental_constrained_ceil))
17927 .getScalarVal();
17928 case PPC::BI__builtin_ppc_friz:
17929 case PPC::BI__builtin_ppc_frizs:
17931 *this, E, Intrinsic::trunc,
17932 Intrinsic::experimental_constrained_trunc))
17933 .getScalarVal();
17934 case PPC::BI__builtin_ppc_fsqrt:
17935 case PPC::BI__builtin_ppc_fsqrts:
17937 *this, E, Intrinsic::sqrt,
17938 Intrinsic::experimental_constrained_sqrt))
17939 .getScalarVal();
17940 case PPC::BI__builtin_ppc_test_data_class: {
17941 Value *Op0 = EmitScalarExpr(E->getArg(0));
17942 Value *Op1 = EmitScalarExpr(E->getArg(1));
17943 return Builder.CreateCall(
17944 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
17945 {Op0, Op1}, "test_data_class");
17946 }
17947 case PPC::BI__builtin_ppc_maxfe: {
17948 Value *Op0 = EmitScalarExpr(E->getArg(0));
17949 Value *Op1 = EmitScalarExpr(E->getArg(1));
17950 Value *Op2 = EmitScalarExpr(E->getArg(2));
17951 Value *Op3 = EmitScalarExpr(E->getArg(3));
17952 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
17953 {Op0, Op1, Op2, Op3});
17954 }
17955 case PPC::BI__builtin_ppc_maxfl: {
17956 Value *Op0 = EmitScalarExpr(E->getArg(0));
17957 Value *Op1 = EmitScalarExpr(E->getArg(1));
17958 Value *Op2 = EmitScalarExpr(E->getArg(2));
17959 Value *Op3 = EmitScalarExpr(E->getArg(3));
17960 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
17961 {Op0, Op1, Op2, Op3});
17962 }
17963 case PPC::BI__builtin_ppc_maxfs: {
17964 Value *Op0 = EmitScalarExpr(E->getArg(0));
17965 Value *Op1 = EmitScalarExpr(E->getArg(1));
17966 Value *Op2 = EmitScalarExpr(E->getArg(2));
17967 Value *Op3 = EmitScalarExpr(E->getArg(3));
17968 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
17969 {Op0, Op1, Op2, Op3});
17970 }
17971 case PPC::BI__builtin_ppc_minfe: {
17972 Value *Op0 = EmitScalarExpr(E->getArg(0));
17973 Value *Op1 = EmitScalarExpr(E->getArg(1));
17974 Value *Op2 = EmitScalarExpr(E->getArg(2));
17975 Value *Op3 = EmitScalarExpr(E->getArg(3));
17976 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
17977 {Op0, Op1, Op2, Op3});
17978 }
17979 case PPC::BI__builtin_ppc_minfl: {
17980 Value *Op0 = EmitScalarExpr(E->getArg(0));
17981 Value *Op1 = EmitScalarExpr(E->getArg(1));
17982 Value *Op2 = EmitScalarExpr(E->getArg(2));
17983 Value *Op3 = EmitScalarExpr(E->getArg(3));
17984 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
17985 {Op0, Op1, Op2, Op3});
17986 }
17987 case PPC::BI__builtin_ppc_minfs: {
17988 Value *Op0 = EmitScalarExpr(E->getArg(0));
17989 Value *Op1 = EmitScalarExpr(E->getArg(1));
17990 Value *Op2 = EmitScalarExpr(E->getArg(2));
17991 Value *Op3 = EmitScalarExpr(E->getArg(3));
17992 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
17993 {Op0, Op1, Op2, Op3});
17994 }
17995 case PPC::BI__builtin_ppc_swdiv:
17996 case PPC::BI__builtin_ppc_swdivs: {
17997 Value *Op0 = EmitScalarExpr(E->getArg(0));
17998 Value *Op1 = EmitScalarExpr(E->getArg(1));
17999 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18000 }
18001 case PPC::BI__builtin_ppc_set_fpscr_rn:
18002 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18003 {EmitScalarExpr(E->getArg(0))});
18004 case PPC::BI__builtin_ppc_mffs:
18005 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18006 }
18007}
18008
18009namespace {
18010// If \p E is not null pointer, insert address space cast to match return
18011// type of \p E if necessary.
18012Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18013 const CallExpr *E = nullptr) {
18014 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18015 auto *Call = CGF.Builder.CreateCall(F);
18016 Call->addRetAttr(
18017 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18018 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18019 if (!E)
18020 return Call;
18021 QualType BuiltinRetType = E->getType();
18022 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18023 if (RetTy == Call->getType())
18024 return Call;
18025 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18026}
18027
18028Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18029 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18030 auto *Call = CGF.Builder.CreateCall(F);
18031 Call->addRetAttr(
18032 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18033 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18034 return Call;
18035}
18036
18037// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18038/// Emit code based on Code Object ABI version.
18039/// COV_4 : Emit code to use dispatch ptr
18040/// COV_5+ : Emit code to use implicitarg ptr
18041/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18042/// and use its value for COV_4 or COV_5+ approach. It is used for
18043/// compiling device libraries in an ABI-agnostic way.
18044///
18045/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18046/// clang during compilation of user code.
18047Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18048 llvm::LoadInst *LD;
18049
18050 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18051
18052 if (Cov == CodeObjectVersionKind::COV_None) {
18053 StringRef Name = "__oclc_ABI_version";
18054 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18055 if (!ABIVersionC)
18056 ABIVersionC = new llvm::GlobalVariable(
18057 CGF.CGM.getModule(), CGF.Int32Ty, false,
18058 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18059 llvm::GlobalVariable::NotThreadLocal,
18061
18062 // This load will be eliminated by the IPSCCP because it is constant
18063 // weak_odr without externally_initialized. Either changing it to weak or
18064 // adding externally_initialized will keep the load.
18065 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18066 CGF.CGM.getIntAlign());
18067
18068 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18069 ABIVersion,
18070 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
18071
18072 // Indexing the implicit kernarg segment.
18073 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18074 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18075
18076 // Indexing the HSA kernel_dispatch_packet struct.
18077 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18078 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18079
18080 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18081 LD = CGF.Builder.CreateLoad(
18083 } else {
18084 Value *GEP = nullptr;
18085 if (Cov >= CodeObjectVersionKind::COV_5) {
18086 // Indexing the implicit kernarg segment.
18087 GEP = CGF.Builder.CreateConstGEP1_32(
18088 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18089 } else {
18090 // Indexing the HSA kernel_dispatch_packet struct.
18091 GEP = CGF.Builder.CreateConstGEP1_32(
18092 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18093 }
18094 LD = CGF.Builder.CreateLoad(
18096 }
18097
18098 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
18099 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
18100 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
18101 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
18102 LD->setMetadata(llvm::LLVMContext::MD_noundef,
18103 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18104 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18105 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18106 return LD;
18107}
18108
18109// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18110Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
18111 const unsigned XOffset = 12;
18112 auto *DP = EmitAMDGPUDispatchPtr(CGF);
18113 // Indexing the HSA kernel_dispatch_packet struct.
18114 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
18115 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
18116 auto *LD = CGF.Builder.CreateLoad(
18118 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18119 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18120 return LD;
18121}
18122} // namespace
18123
18124// For processing memory ordering and memory scope arguments of various
18125// amdgcn builtins.
18126// \p Order takes a C++11 comptabile memory-ordering specifier and converts
18127// it into LLVM's memory ordering specifier using atomic C ABI, and writes
18128// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
18129// specific SyncScopeID and writes it to \p SSID.
18131 llvm::AtomicOrdering &AO,
18132 llvm::SyncScope::ID &SSID) {
18133 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
18134
18135 // Map C11/C++11 memory ordering to LLVM memory ordering
18136 assert(llvm::isValidAtomicOrderingCABI(ord));
18137 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
18138 case llvm::AtomicOrderingCABI::acquire:
18139 case llvm::AtomicOrderingCABI::consume:
18140 AO = llvm::AtomicOrdering::Acquire;
18141 break;
18142 case llvm::AtomicOrderingCABI::release:
18143 AO = llvm::AtomicOrdering::Release;
18144 break;
18145 case llvm::AtomicOrderingCABI::acq_rel:
18146 AO = llvm::AtomicOrdering::AcquireRelease;
18147 break;
18148 case llvm::AtomicOrderingCABI::seq_cst:
18149 AO = llvm::AtomicOrdering::SequentiallyConsistent;
18150 break;
18151 case llvm::AtomicOrderingCABI::relaxed:
18152 AO = llvm::AtomicOrdering::Monotonic;
18153 break;
18154 }
18155
18156 StringRef scp;
18157 llvm::getConstantStringInfo(Scope, scp);
18158 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18159}
18160
18161llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
18162 unsigned Idx,
18163 const CallExpr *E) {
18164 llvm::Value *Arg = nullptr;
18165 if ((ICEArguments & (1 << Idx)) == 0) {
18166 Arg = EmitScalarExpr(E->getArg(Idx));
18167 } else {
18168 // If this is required to be a constant, constant fold it so that we
18169 // know that the generated intrinsic gets a ConstantInt.
18170 std::optional<llvm::APSInt> Result =
18172 assert(Result && "Expected argument to be a constant");
18173 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
18174 }
18175 return Arg;
18176}
18177
18178Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {
18179 if (QT->hasFloatingRepresentation()) {
18180 switch (elementCount) {
18181 case 2:
18182 return Intrinsic::dx_dot2;
18183 case 3:
18184 return Intrinsic::dx_dot3;
18185 case 4:
18186 return Intrinsic::dx_dot4;
18187 }
18188 }
18190 return Intrinsic::dx_sdot;
18191
18193 return Intrinsic::dx_udot;
18194}
18195
18197 const CallExpr *E) {
18198 if (!getLangOpts().HLSL)
18199 return nullptr;
18200
18201 switch (BuiltinID) {
18202 case Builtin::BI__builtin_hlsl_elementwise_all: {
18203 Value *Op0 = EmitScalarExpr(E->getArg(0));
18204 return Builder.CreateIntrinsic(
18205 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18206 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18207 "hlsl.all");
18208 }
18209 case Builtin::BI__builtin_hlsl_elementwise_any: {
18210 Value *Op0 = EmitScalarExpr(E->getArg(0));
18211 return Builder.CreateIntrinsic(
18212 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18213 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18214 "hlsl.any");
18215 }
18216 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
18217 Value *OpX = EmitScalarExpr(E->getArg(0));
18218 Value *OpMin = EmitScalarExpr(E->getArg(1));
18219 Value *OpMax = EmitScalarExpr(E->getArg(2));
18220
18221 QualType Ty = E->getArg(0)->getType();
18222 bool IsUnsigned = false;
18223 if (auto *VecTy = Ty->getAs<VectorType>())
18224 Ty = VecTy->getElementType();
18225 IsUnsigned = Ty->isUnsignedIntegerType();
18226 return Builder.CreateIntrinsic(
18227 /*ReturnType=*/OpX->getType(),
18228 IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
18229 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
18230 }
18231 case Builtin::BI__builtin_hlsl_dot: {
18232 Value *Op0 = EmitScalarExpr(E->getArg(0));
18233 Value *Op1 = EmitScalarExpr(E->getArg(1));
18234 llvm::Type *T0 = Op0->getType();
18235 llvm::Type *T1 = Op1->getType();
18236 if (!T0->isVectorTy() && !T1->isVectorTy()) {
18237 if (T0->isFloatingPointTy())
18238 return Builder.CreateFMul(Op0, Op1, "dx.dot");
18239
18240 if (T0->isIntegerTy())
18241 return Builder.CreateMul(Op0, Op1, "dx.dot");
18242
18243 // Bools should have been promoted
18244 llvm_unreachable(
18245 "Scalar dot product is only supported on ints and floats.");
18246 }
18247 // A VectorSplat should have happened
18248 assert(T0->isVectorTy() && T1->isVectorTy() &&
18249 "Dot product of vector and scalar is not supported.");
18250
18251 // A vector sext or sitofp should have happened
18252 assert(T0->getScalarType() == T1->getScalarType() &&
18253 "Dot product of vectors need the same element types.");
18254
18255 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
18256 [[maybe_unused]] auto *VecTy1 =
18257 E->getArg(1)->getType()->getAs<VectorType>();
18258 // A HLSLVectorTruncation should have happend
18259 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
18260 "Dot product requires vectors to be of the same size.");
18261
18262 return Builder.CreateIntrinsic(
18263 /*ReturnType=*/T0->getScalarType(),
18265 VecTy0->getNumElements()),
18266 ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
18267 } break;
18268 case Builtin::BI__builtin_hlsl_lerp: {
18269 Value *X = EmitScalarExpr(E->getArg(0));
18270 Value *Y = EmitScalarExpr(E->getArg(1));
18271 Value *S = EmitScalarExpr(E->getArg(2));
18272 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18273 llvm_unreachable("lerp operand must have a float representation");
18274 return Builder.CreateIntrinsic(
18275 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
18276 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
18277 }
18278 case Builtin::BI__builtin_hlsl_elementwise_frac: {
18279 Value *Op0 = EmitScalarExpr(E->getArg(0));
18280 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18281 llvm_unreachable("frac operand must have a float representation");
18282 return Builder.CreateIntrinsic(
18283 /*ReturnType=*/Op0->getType(), Intrinsic::dx_frac,
18284 ArrayRef<Value *>{Op0}, nullptr, "dx.frac");
18285 }
18286 case Builtin::BI__builtin_hlsl_elementwise_isinf: {
18287 Value *Op0 = EmitScalarExpr(E->getArg(0));
18288 llvm::Type *Xty = Op0->getType();
18289 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
18290 if (Xty->isVectorTy()) {
18291 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
18292 retType = llvm::VectorType::get(
18293 retType, ElementCount::getFixed(XVecTy->getNumElements()));
18294 }
18295 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18296 llvm_unreachable("isinf operand must have a float representation");
18297 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
18298 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
18299 }
18300 case Builtin::BI__builtin_hlsl_mad: {
18301 Value *M = EmitScalarExpr(E->getArg(0));
18302 Value *A = EmitScalarExpr(E->getArg(1));
18303 Value *B = EmitScalarExpr(E->getArg(2));
18305 return Builder.CreateIntrinsic(
18306 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
18307 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
18308
18310 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18311 return Builder.CreateIntrinsic(
18312 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
18313 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
18314
18315 Value *Mul = Builder.CreateNSWMul(M, A);
18316 return Builder.CreateNSWAdd(Mul, B);
18317 }
18319 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18320 return Builder.CreateIntrinsic(
18321 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
18322 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
18323
18324 Value *Mul = Builder.CreateNUWMul(M, A);
18325 return Builder.CreateNUWAdd(Mul, B);
18326 }
18327 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
18328 Value *Op0 = EmitScalarExpr(E->getArg(0));
18329 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18330 llvm_unreachable("rcp operand must have a float representation");
18331 llvm::Type *Ty = Op0->getType();
18332 llvm::Type *EltTy = Ty->getScalarType();
18333 Constant *One =
18334 Ty->isVectorTy()
18335 ? ConstantVector::getSplat(
18336 ElementCount::getFixed(
18337 dyn_cast<FixedVectorType>(Ty)->getNumElements()),
18338 ConstantFP::get(EltTy, 1.0))
18339 : ConstantFP::get(EltTy, 1.0);
18340 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
18341 }
18342 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
18343 Value *Op0 = EmitScalarExpr(E->getArg(0));
18344 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18345 llvm_unreachable("rsqrt operand must have a float representation");
18346 return Builder.CreateIntrinsic(
18347 /*ReturnType=*/Op0->getType(), Intrinsic::dx_rsqrt,
18348 ArrayRef<Value *>{Op0}, nullptr, "dx.rsqrt");
18349 }
18350 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
18352 llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
18353 {}, false, true));
18354 if (getTarget().getTriple().isSPIRVLogical())
18355 CI = dyn_cast<CallInst>(addControlledConvergenceToken(CI));
18356 return CI;
18357 }
18358 }
18359 return nullptr;
18360}
18361
18363 const CallExpr *E) {
18364 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
18365 llvm::SyncScope::ID SSID;
18366 switch (BuiltinID) {
18367 case AMDGPU::BI__builtin_amdgcn_div_scale:
18368 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
18369 // Translate from the intrinsics's struct return to the builtin's out
18370 // argument.
18371
18372 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
18373
18374 llvm::Value *X = EmitScalarExpr(E->getArg(0));
18375 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
18376 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
18377
18378 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
18379 X->getType());
18380
18381 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
18382
18383 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
18384 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
18385
18386 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
18387
18388 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
18389 Builder.CreateStore(FlagExt, FlagOutPtr);
18390 return Result;
18391 }
18392 case AMDGPU::BI__builtin_amdgcn_div_fmas:
18393 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
18394 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18395 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18396 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18397 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18398
18399 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
18400 Src0->getType());
18401 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
18402 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
18403 }
18404
18405 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
18406 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
18407 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
18408 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
18409 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
18410 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
18412 // Find out if any arguments are required to be integer constant
18413 // expressions.
18414 unsigned ICEArguments = 0;
18416 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
18417 assert(Error == ASTContext::GE_None && "Should not codegen an error");
18418 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
18419 Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
18420 }
18421 assert(Args.size() == 5 || Args.size() == 6);
18422 if (Args.size() == 5)
18423 Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
18424 Function *F =
18425 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
18426 return Builder.CreateCall(F, Args);
18427 }
18428 case AMDGPU::BI__builtin_amdgcn_div_fixup:
18429 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
18430 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
18431 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
18432 case AMDGPU::BI__builtin_amdgcn_trig_preop:
18433 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
18434 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
18435 case AMDGPU::BI__builtin_amdgcn_rcp:
18436 case AMDGPU::BI__builtin_amdgcn_rcpf:
18437 case AMDGPU::BI__builtin_amdgcn_rcph:
18438 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
18439 case AMDGPU::BI__builtin_amdgcn_sqrt:
18440 case AMDGPU::BI__builtin_amdgcn_sqrtf:
18441 case AMDGPU::BI__builtin_amdgcn_sqrth:
18442 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
18443 case AMDGPU::BI__builtin_amdgcn_rsq:
18444 case AMDGPU::BI__builtin_amdgcn_rsqf:
18445 case AMDGPU::BI__builtin_amdgcn_rsqh:
18446 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
18447 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
18448 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
18449 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
18450 case AMDGPU::BI__builtin_amdgcn_sinf:
18451 case AMDGPU::BI__builtin_amdgcn_sinh:
18452 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
18453 case AMDGPU::BI__builtin_amdgcn_cosf:
18454 case AMDGPU::BI__builtin_amdgcn_cosh:
18455 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
18456 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
18457 return EmitAMDGPUDispatchPtr(*this, E);
18458 case AMDGPU::BI__builtin_amdgcn_logf:
18459 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log);
18460 case AMDGPU::BI__builtin_amdgcn_exp2f:
18461 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2);
18462 case AMDGPU::BI__builtin_amdgcn_log_clampf:
18463 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
18464 case AMDGPU::BI__builtin_amdgcn_ldexp:
18465 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
18466 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18467 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18468 llvm::Function *F =
18469 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
18470 return Builder.CreateCall(F, {Src0, Src1});
18471 }
18472 case AMDGPU::BI__builtin_amdgcn_ldexph: {
18473 // The raw instruction has a different behavior for out of bounds exponent
18474 // values (implicit truncation instead of saturate to short_min/short_max).
18475 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18476 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18477 llvm::Function *F =
18478 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
18479 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
18480 }
18481 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
18482 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
18483 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
18484 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
18485 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
18486 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
18487 Value *Src0 = EmitScalarExpr(E->getArg(0));
18488 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18489 { Builder.getInt32Ty(), Src0->getType() });
18490 return Builder.CreateCall(F, Src0);
18491 }
18492 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
18493 Value *Src0 = EmitScalarExpr(E->getArg(0));
18494 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18495 { Builder.getInt16Ty(), Src0->getType() });
18496 return Builder.CreateCall(F, Src0);
18497 }
18498 case AMDGPU::BI__builtin_amdgcn_fract:
18499 case AMDGPU::BI__builtin_amdgcn_fractf:
18500 case AMDGPU::BI__builtin_amdgcn_fracth:
18501 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
18502 case AMDGPU::BI__builtin_amdgcn_lerp:
18503 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
18504 case AMDGPU::BI__builtin_amdgcn_ubfe:
18505 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
18506 case AMDGPU::BI__builtin_amdgcn_sbfe:
18507 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
18508 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
18509 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
18510 llvm::Type *ResultType = ConvertType(E->getType());
18511 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
18512 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
18513 return Builder.CreateCall(F, { Src });
18514 }
18515 case AMDGPU::BI__builtin_amdgcn_uicmp:
18516 case AMDGPU::BI__builtin_amdgcn_uicmpl:
18517 case AMDGPU::BI__builtin_amdgcn_sicmp:
18518 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
18519 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18520 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18521 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18522
18523 // FIXME-GFX10: How should 32 bit mask be handled?
18524 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
18525 { Builder.getInt64Ty(), Src0->getType() });
18526 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18527 }
18528 case AMDGPU::BI__builtin_amdgcn_fcmp:
18529 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
18530 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18531 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18532 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18533
18534 // FIXME-GFX10: How should 32 bit mask be handled?
18535 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
18536 { Builder.getInt64Ty(), Src0->getType() });
18537 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18538 }
18539 case AMDGPU::BI__builtin_amdgcn_class:
18540 case AMDGPU::BI__builtin_amdgcn_classf:
18541 case AMDGPU::BI__builtin_amdgcn_classh:
18542 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
18543 case AMDGPU::BI__builtin_amdgcn_fmed3f:
18544 case AMDGPU::BI__builtin_amdgcn_fmed3h:
18545 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
18546 case AMDGPU::BI__builtin_amdgcn_ds_append:
18547 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
18548 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
18549 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
18550 Value *Src0 = EmitScalarExpr(E->getArg(0));
18551 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
18552 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
18553 }
18554 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18555 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18556 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
18557 Intrinsic::ID Intrin;
18558 switch (BuiltinID) {
18559 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18560 Intrin = Intrinsic::amdgcn_ds_fadd;
18561 break;
18562 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18563 Intrin = Intrinsic::amdgcn_ds_fmin;
18564 break;
18565 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
18566 Intrin = Intrinsic::amdgcn_ds_fmax;
18567 break;
18568 }
18569 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18570 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18571 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18572 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18573 llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
18574 llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
18575 llvm::FunctionType *FTy = F->getFunctionType();
18576 llvm::Type *PTy = FTy->getParamType(0);
18578 return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
18579 }
18580 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18581 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18582 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18583 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18584 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18585 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18586 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18587 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18588 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18589 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
18590 Intrinsic::ID IID;
18591 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18592 switch (BuiltinID) {
18593 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18594 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18595 IID = Intrinsic::amdgcn_global_atomic_fadd;
18596 break;
18597 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18598 ArgTy = llvm::FixedVectorType::get(
18599 llvm::Type::getHalfTy(getLLVMContext()), 2);
18600 IID = Intrinsic::amdgcn_global_atomic_fadd;
18601 break;
18602 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18603 IID = Intrinsic::amdgcn_global_atomic_fadd;
18604 break;
18605 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18606 IID = Intrinsic::amdgcn_global_atomic_fmin;
18607 break;
18608 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18609 IID = Intrinsic::amdgcn_global_atomic_fmax;
18610 break;
18611 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18612 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18613 break;
18614 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18615 IID = Intrinsic::amdgcn_flat_atomic_fmin;
18616 break;
18617 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18618 IID = Intrinsic::amdgcn_flat_atomic_fmax;
18619 break;
18620 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18621 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18622 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18623 break;
18624 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
18625 ArgTy = llvm::FixedVectorType::get(
18626 llvm::Type::getHalfTy(getLLVMContext()), 2);
18627 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18628 break;
18629 }
18630 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18631 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18632 llvm::Function *F =
18633 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
18634 return Builder.CreateCall(F, {Addr, Val});
18635 }
18636 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18637 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18638 Intrinsic::ID IID;
18639 switch (BuiltinID) {
18640 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18641 IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18642 break;
18643 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18644 IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18645 break;
18646 }
18647 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18648 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18649 llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18650 return Builder.CreateCall(F, {Addr, Val});
18651 }
18652 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18653 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18654 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
18655 Intrinsic::ID IID;
18656 llvm::Type *ArgTy;
18657 switch (BuiltinID) {
18658 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18659 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18660 IID = Intrinsic::amdgcn_ds_fadd;
18661 break;
18662 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18663 ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18664 IID = Intrinsic::amdgcn_ds_fadd;
18665 break;
18666 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
18667 ArgTy = llvm::FixedVectorType::get(
18668 llvm::Type::getHalfTy(getLLVMContext()), 2);
18669 IID = Intrinsic::amdgcn_ds_fadd;
18670 break;
18671 }
18672 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18673 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18674 llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
18675 llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
18676 llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
18677 llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
18678 llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
18679 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
18680 }
18681 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18682 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18683 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18684 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16: {
18685
18686 Intrinsic::ID IID;
18687 switch (BuiltinID) {
18688 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18689 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18690 IID = Intrinsic::amdgcn_global_load_tr_b64;
18691 break;
18692 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18693 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18694 IID = Intrinsic::amdgcn_global_load_tr_b128;
18695 break;
18696 }
18697 llvm::Type *LoadTy = ConvertType(E->getType());
18698 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18699 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
18700 return Builder.CreateCall(F, {Addr});
18701 }
18702 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
18703 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
18704 {llvm::Type::getInt64Ty(getLLVMContext())});
18705 return Builder.CreateCall(F);
18706 }
18707 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
18708 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
18709 {llvm::Type::getInt64Ty(getLLVMContext())});
18710 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
18711 return Builder.CreateCall(F, {Env});
18712 }
18713 case AMDGPU::BI__builtin_amdgcn_read_exec:
18714 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
18715 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
18716 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
18717 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
18718 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
18719 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
18720 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
18721 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
18722 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
18723 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
18724 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
18725 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
18726 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
18727 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
18728 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
18729
18730 // The builtins take these arguments as vec4 where the last element is
18731 // ignored. The intrinsic takes them as vec3.
18732 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
18733 ArrayRef<int>{0, 1, 2});
18734 RayDir =
18735 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
18736 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
18737 ArrayRef<int>{0, 1, 2});
18738
18739 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
18740 {NodePtr->getType(), RayDir->getType()});
18741 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
18742 RayInverseDir, TextureDescr});
18743 }
18744
18745 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
18747 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18748 Args.push_back(EmitScalarExpr(E->getArg(i)));
18749
18750 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
18751 Value *Call = Builder.CreateCall(F, Args);
18752 Value *Rtn = Builder.CreateExtractValue(Call, 0);
18753 Value *A = Builder.CreateExtractValue(Call, 1);
18754 llvm::Type *RetTy = ConvertType(E->getType());
18755 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
18756 (uint64_t)0);
18757 return Builder.CreateInsertElement(I0, A, 1);
18758 }
18759
18760 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18761 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18762 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18763 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18764 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18765 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18766 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18767 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18768 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18769 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18770 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18771 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18772 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18773 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18774 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18775 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18776 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18777 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18778 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18779 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18780 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18781 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18782 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18783 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18784 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18785 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18786 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18787 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18788 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18789 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18790 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18791 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18792 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18793 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18794 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18795 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18796 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18797 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18798 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18799 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18800 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18801 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18802 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18803 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18804 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18805 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18806 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18807 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18808 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18809 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18810 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18811 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
18812 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
18813 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
18814 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
18815 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
18816 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
18817 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
18818 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
18819 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
18820
18821 // These operations perform a matrix multiplication and accumulation of
18822 // the form:
18823 // D = A * B + C
18824 // We need to specify one type for matrices AB and one for matrices CD.
18825 // Sparse matrix operations can have different types for A and B as well as
18826 // an additional type for sparsity index.
18827 // Destination type should be put before types used for source operands.
18828 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
18829 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
18830 // There is no need for the variable opsel argument, so always set it to
18831 // "false".
18832 bool AppendFalseForOpselArg = false;
18833 unsigned BuiltinWMMAOp;
18834
18835 switch (BuiltinID) {
18836 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18837 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18838 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18839 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18840 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18841 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
18842 break;
18843 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18844 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18845 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18846 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18847 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18848 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
18849 break;
18850 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18851 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18852 AppendFalseForOpselArg = true;
18853 [[fallthrough]];
18854 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18855 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18856 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18857 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
18858 break;
18859 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18860 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18861 AppendFalseForOpselArg = true;
18862 [[fallthrough]];
18863 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18864 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18865 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18866 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
18867 break;
18868 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18869 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18870 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18871 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
18872 break;
18873 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18874 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18875 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18876 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
18877 break;
18878 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18879 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18880 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18881 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18882 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18883 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
18884 break;
18885 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18886 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18887 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18888 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18889 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18890 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
18891 break;
18892 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18893 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18894 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18895 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
18896 break;
18897 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18898 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18899 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18900 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
18901 break;
18902 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18903 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18904 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18905 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
18906 break;
18907 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18908 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18909 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18910 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
18911 break;
18912 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18913 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18914 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18915 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
18916 break;
18917 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18918 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18919 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18920 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
18921 break;
18922 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18923 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18924 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18925 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
18926 break;
18927 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18928 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18929 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18930 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
18931 break;
18932 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18933 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18934 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18935 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
18936 break;
18937 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18938 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18939 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18940 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
18941 break;
18942 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18943 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18944 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18945 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
18946 break;
18947 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18948 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
18949 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18950 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
18951 break;
18952 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
18953 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
18954 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18955 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
18956 break;
18957 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
18958 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
18959 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18960 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
18961 break;
18962 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
18963 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
18964 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18965 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
18966 break;
18967 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
18968 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
18969 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18970 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
18971 break;
18972 }
18973
18975 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18976 Args.push_back(EmitScalarExpr(E->getArg(i)));
18977 if (AppendFalseForOpselArg)
18978 Args.push_back(Builder.getFalse());
18979
18981 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
18982 ArgTypes.push_back(Args[ArgIdx]->getType());
18983
18984 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
18985 return Builder.CreateCall(F, Args);
18986 }
18987
18988 // amdgcn workitem
18989 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
18990 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
18991 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
18992 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
18993 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
18994 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
18995
18996 // amdgcn workgroup size
18997 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
18998 return EmitAMDGPUWorkGroupSize(*this, 0);
18999 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
19000 return EmitAMDGPUWorkGroupSize(*this, 1);
19001 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
19002 return EmitAMDGPUWorkGroupSize(*this, 2);
19003
19004 // amdgcn grid size
19005 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
19006 return EmitAMDGPUGridSize(*this, 0);
19007 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
19008 return EmitAMDGPUGridSize(*this, 1);
19009 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
19010 return EmitAMDGPUGridSize(*this, 2);
19011
19012 // r600 intrinsics
19013 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
19014 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
19015 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
19016 case AMDGPU::BI__builtin_r600_read_tidig_x:
19017 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
19018 case AMDGPU::BI__builtin_r600_read_tidig_y:
19019 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
19020 case AMDGPU::BI__builtin_r600_read_tidig_z:
19021 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
19022 case AMDGPU::BI__builtin_amdgcn_alignbit: {
19023 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19024 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19025 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19026 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
19027 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19028 }
19029 case AMDGPU::BI__builtin_amdgcn_fence: {
19031 EmitScalarExpr(E->getArg(1)), AO, SSID);
19032 return Builder.CreateFence(AO, SSID);
19033 }
19034 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19035 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19036 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19037 case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
19038 llvm::AtomicRMWInst::BinOp BinOp;
19039 switch (BuiltinID) {
19040 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19041 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19042 BinOp = llvm::AtomicRMWInst::UIncWrap;
19043 break;
19044 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19045 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19046 BinOp = llvm::AtomicRMWInst::UDecWrap;
19047 break;
19048 }
19049
19050 Address Ptr = CheckAtomicAlignment(*this, E);
19051 Value *Val = EmitScalarExpr(E->getArg(1));
19052
19054 EmitScalarExpr(E->getArg(3)), AO, SSID);
19055
19056 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
19057 bool Volatile =
19058 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
19059
19060 llvm::AtomicRMWInst *RMW =
19061 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
19062 if (Volatile)
19063 RMW->setVolatile(true);
19064 return RMW;
19065 }
19066 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
19067 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
19068 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
19069 llvm::Type *ResultType = ConvertType(E->getType());
19070 // s_sendmsg_rtn is mangled using return type only.
19071 Function *F =
19072 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
19073 return Builder.CreateCall(F, {Arg});
19074 }
19075 default:
19076 return nullptr;
19077 }
19078}
19079
19080/// Handle a SystemZ function in which the final argument is a pointer
19081/// to an int that receives the post-instruction CC value. At the LLVM level
19082/// this is represented as a function that returns a {result, cc} pair.
19084 unsigned IntrinsicID,
19085 const CallExpr *E) {
19086 unsigned NumArgs = E->getNumArgs() - 1;
19087 SmallVector<Value *, 8> Args(NumArgs);
19088 for (unsigned I = 0; I < NumArgs; ++I)
19089 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
19090 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
19091 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
19092 Value *Call = CGF.Builder.CreateCall(F, Args);
19093 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
19094 CGF.Builder.CreateStore(CC, CCPtr);
19095 return CGF.Builder.CreateExtractValue(Call, 0);
19096}
19097
19099 const CallExpr *E) {
19100 switch (BuiltinID) {
19101 case SystemZ::BI__builtin_tbegin: {
19102 Value *TDB = EmitScalarExpr(E->getArg(0));
19103 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19104 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
19105 return Builder.CreateCall(F, {TDB, Control});
19106 }
19107 case SystemZ::BI__builtin_tbegin_nofloat: {
19108 Value *TDB = EmitScalarExpr(E->getArg(0));
19109 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19110 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
19111 return Builder.CreateCall(F, {TDB, Control});
19112 }
19113 case SystemZ::BI__builtin_tbeginc: {
19114 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
19115 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
19116 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
19117 return Builder.CreateCall(F, {TDB, Control});
19118 }
19119 case SystemZ::BI__builtin_tabort: {
19120 Value *Data = EmitScalarExpr(E->getArg(0));
19121 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
19122 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
19123 }
19124 case SystemZ::BI__builtin_non_tx_store: {
19126 Value *Data = EmitScalarExpr(E->getArg(1));
19127 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
19128 return Builder.CreateCall(F, {Data, Address});
19129 }
19130
19131 // Vector builtins. Note that most vector builtins are mapped automatically
19132 // to target-specific LLVM intrinsics. The ones handled specially here can
19133 // be represented via standard LLVM IR, which is preferable to enable common
19134 // LLVM optimizations.
19135
19136 case SystemZ::BI__builtin_s390_vpopctb:
19137 case SystemZ::BI__builtin_s390_vpopcth:
19138 case SystemZ::BI__builtin_s390_vpopctf:
19139 case SystemZ::BI__builtin_s390_vpopctg: {
19140 llvm::Type *ResultType = ConvertType(E->getType());
19141 Value *X = EmitScalarExpr(E->getArg(0));
19142 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
19143 return Builder.CreateCall(F, X);
19144 }
19145
19146 case SystemZ::BI__builtin_s390_vclzb:
19147 case SystemZ::BI__builtin_s390_vclzh:
19148 case SystemZ::BI__builtin_s390_vclzf:
19149 case SystemZ::BI__builtin_s390_vclzg: {
19150 llvm::Type *ResultType = ConvertType(E->getType());
19151 Value *X = EmitScalarExpr(E->getArg(0));
19152 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19153 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
19154 return Builder.CreateCall(F, {X, Undef});
19155 }
19156
19157 case SystemZ::BI__builtin_s390_vctzb:
19158 case SystemZ::BI__builtin_s390_vctzh:
19159 case SystemZ::BI__builtin_s390_vctzf:
19160 case SystemZ::BI__builtin_s390_vctzg: {
19161 llvm::Type *ResultType = ConvertType(E->getType());
19162 Value *X = EmitScalarExpr(E->getArg(0));
19163 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19164 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
19165 return Builder.CreateCall(F, {X, Undef});
19166 }
19167
19168 case SystemZ::BI__builtin_s390_verllb:
19169 case SystemZ::BI__builtin_s390_verllh:
19170 case SystemZ::BI__builtin_s390_verllf:
19171 case SystemZ::BI__builtin_s390_verllg: {
19172 llvm::Type *ResultType = ConvertType(E->getType());
19173 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19174 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19175 // Splat scalar rotate amount to vector type.
19176 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
19177 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
19178 Amt = Builder.CreateVectorSplat(NumElts, Amt);
19179 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19180 return Builder.CreateCall(F, { Src, Src, Amt });
19181 }
19182
19183 case SystemZ::BI__builtin_s390_verllvb:
19184 case SystemZ::BI__builtin_s390_verllvh:
19185 case SystemZ::BI__builtin_s390_verllvf:
19186 case SystemZ::BI__builtin_s390_verllvg: {
19187 llvm::Type *ResultType = ConvertType(E->getType());
19188 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19189 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19190 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19191 return Builder.CreateCall(F, { Src, Src, Amt });
19192 }
19193
19194 case SystemZ::BI__builtin_s390_vfsqsb:
19195 case SystemZ::BI__builtin_s390_vfsqdb: {
19196 llvm::Type *ResultType = ConvertType(E->getType());
19197 Value *X = EmitScalarExpr(E->getArg(0));
19198 if (Builder.getIsFPConstrained()) {
19199 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
19200 return Builder.CreateConstrainedFPCall(F, { X });
19201 } else {
19202 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
19203 return Builder.CreateCall(F, X);
19204 }
19205 }
19206 case SystemZ::BI__builtin_s390_vfmasb:
19207 case SystemZ::BI__builtin_s390_vfmadb: {
19208 llvm::Type *ResultType = ConvertType(E->getType());
19209 Value *X = EmitScalarExpr(E->getArg(0));
19210 Value *Y = EmitScalarExpr(E->getArg(1));
19211 Value *Z = EmitScalarExpr(E->getArg(2));
19212 if (Builder.getIsFPConstrained()) {
19213 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19214 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
19215 } else {
19216 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19217 return Builder.CreateCall(F, {X, Y, Z});
19218 }
19219 }
19220 case SystemZ::BI__builtin_s390_vfmssb:
19221 case SystemZ::BI__builtin_s390_vfmsdb: {
19222 llvm::Type *ResultType = ConvertType(E->getType());
19223 Value *X = EmitScalarExpr(E->getArg(0));
19224 Value *Y = EmitScalarExpr(E->getArg(1));
19225 Value *Z = EmitScalarExpr(E->getArg(2));
19226 if (Builder.getIsFPConstrained()) {
19227 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19228 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19229 } else {
19230 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19231 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19232 }
19233 }
19234 case SystemZ::BI__builtin_s390_vfnmasb:
19235 case SystemZ::BI__builtin_s390_vfnmadb: {
19236 llvm::Type *ResultType = ConvertType(E->getType());
19237 Value *X = EmitScalarExpr(E->getArg(0));
19238 Value *Y = EmitScalarExpr(E->getArg(1));
19239 Value *Z = EmitScalarExpr(E->getArg(2));
19240 if (Builder.getIsFPConstrained()) {
19241 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19242 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
19243 } else {
19244 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19245 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
19246 }
19247 }
19248 case SystemZ::BI__builtin_s390_vfnmssb:
19249 case SystemZ::BI__builtin_s390_vfnmsdb: {
19250 llvm::Type *ResultType = ConvertType(E->getType());
19251 Value *X = EmitScalarExpr(E->getArg(0));
19252 Value *Y = EmitScalarExpr(E->getArg(1));
19253 Value *Z = EmitScalarExpr(E->getArg(2));
19254 if (Builder.getIsFPConstrained()) {
19255 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19256 Value *NegZ = Builder.CreateFNeg(Z, "sub");
19257 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
19258 } else {
19259 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19260 Value *NegZ = Builder.CreateFNeg(Z, "neg");
19261 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
19262 }
19263 }
19264 case SystemZ::BI__builtin_s390_vflpsb:
19265 case SystemZ::BI__builtin_s390_vflpdb: {
19266 llvm::Type *ResultType = ConvertType(E->getType());
19267 Value *X = EmitScalarExpr(E->getArg(0));
19268 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19269 return Builder.CreateCall(F, X);
19270 }
19271 case SystemZ::BI__builtin_s390_vflnsb:
19272 case SystemZ::BI__builtin_s390_vflndb: {
19273 llvm::Type *ResultType = ConvertType(E->getType());
19274 Value *X = EmitScalarExpr(E->getArg(0));
19275 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19276 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
19277 }
19278 case SystemZ::BI__builtin_s390_vfisb:
19279 case SystemZ::BI__builtin_s390_vfidb: {
19280 llvm::Type *ResultType = ConvertType(E->getType());
19281 Value *X = EmitScalarExpr(E->getArg(0));
19282 // Constant-fold the M4 and M5 mask arguments.
19283 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
19284 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19285 // Check whether this instance can be represented via a LLVM standard
19286 // intrinsic. We only support some combinations of M4 and M5.
19287 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19288 Intrinsic::ID CI;
19289 switch (M4.getZExtValue()) {
19290 default: break;
19291 case 0: // IEEE-inexact exception allowed
19292 switch (M5.getZExtValue()) {
19293 default: break;
19294 case 0: ID = Intrinsic::rint;
19295 CI = Intrinsic::experimental_constrained_rint; break;
19296 }
19297 break;
19298 case 4: // IEEE-inexact exception suppressed
19299 switch (M5.getZExtValue()) {
19300 default: break;
19301 case 0: ID = Intrinsic::nearbyint;
19302 CI = Intrinsic::experimental_constrained_nearbyint; break;
19303 case 1: ID = Intrinsic::round;
19304 CI = Intrinsic::experimental_constrained_round; break;
19305 case 5: ID = Intrinsic::trunc;
19306 CI = Intrinsic::experimental_constrained_trunc; break;
19307 case 6: ID = Intrinsic::ceil;
19308 CI = Intrinsic::experimental_constrained_ceil; break;
19309 case 7: ID = Intrinsic::floor;
19310 CI = Intrinsic::experimental_constrained_floor; break;
19311 }
19312 break;
19313 }
19314 if (ID != Intrinsic::not_intrinsic) {
19315 if (Builder.getIsFPConstrained()) {
19316 Function *F = CGM.getIntrinsic(CI, ResultType);
19317 return Builder.CreateConstrainedFPCall(F, X);
19318 } else {
19319 Function *F = CGM.getIntrinsic(ID, ResultType);
19320 return Builder.CreateCall(F, X);
19321 }
19322 }
19323 switch (BuiltinID) { // FIXME: constrained version?
19324 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
19325 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
19326 default: llvm_unreachable("Unknown BuiltinID");
19327 }
19328 Function *F = CGM.getIntrinsic(ID);
19329 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19330 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
19331 return Builder.CreateCall(F, {X, M4Value, M5Value});
19332 }
19333 case SystemZ::BI__builtin_s390_vfmaxsb:
19334 case SystemZ::BI__builtin_s390_vfmaxdb: {
19335 llvm::Type *ResultType = ConvertType(E->getType());
19336 Value *X = EmitScalarExpr(E->getArg(0));
19337 Value *Y = EmitScalarExpr(E->getArg(1));
19338 // Constant-fold the M4 mask argument.
19339 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19340 // Check whether this instance can be represented via a LLVM standard
19341 // intrinsic. We only support some values of M4.
19342 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19343 Intrinsic::ID CI;
19344 switch (M4.getZExtValue()) {
19345 default: break;
19346 case 4: ID = Intrinsic::maxnum;
19347 CI = Intrinsic::experimental_constrained_maxnum; break;
19348 }
19349 if (ID != Intrinsic::not_intrinsic) {
19350 if (Builder.getIsFPConstrained()) {
19351 Function *F = CGM.getIntrinsic(CI, ResultType);
19352 return Builder.CreateConstrainedFPCall(F, {X, Y});
19353 } else {
19354 Function *F = CGM.getIntrinsic(ID, ResultType);
19355 return Builder.CreateCall(F, {X, Y});
19356 }
19357 }
19358 switch (BuiltinID) {
19359 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
19360 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
19361 default: llvm_unreachable("Unknown BuiltinID");
19362 }
19363 Function *F = CGM.getIntrinsic(ID);
19364 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19365 return Builder.CreateCall(F, {X, Y, M4Value});
19366 }
19367 case SystemZ::BI__builtin_s390_vfminsb:
19368 case SystemZ::BI__builtin_s390_vfmindb: {
19369 llvm::Type *ResultType = ConvertType(E->getType());
19370 Value *X = EmitScalarExpr(E->getArg(0));
19371 Value *Y = EmitScalarExpr(E->getArg(1));
19372 // Constant-fold the M4 mask argument.
19373 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19374 // Check whether this instance can be represented via a LLVM standard
19375 // intrinsic. We only support some values of M4.
19376 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19377 Intrinsic::ID CI;
19378 switch (M4.getZExtValue()) {
19379 default: break;
19380 case 4: ID = Intrinsic::minnum;
19381 CI = Intrinsic::experimental_constrained_minnum; break;
19382 }
19383 if (ID != Intrinsic::not_intrinsic) {
19384 if (Builder.getIsFPConstrained()) {
19385 Function *F = CGM.getIntrinsic(CI, ResultType);
19386 return Builder.CreateConstrainedFPCall(F, {X, Y});
19387 } else {
19388 Function *F = CGM.getIntrinsic(ID, ResultType);
19389 return Builder.CreateCall(F, {X, Y});
19390 }
19391 }
19392 switch (BuiltinID) {
19393 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
19394 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
19395 default: llvm_unreachable("Unknown BuiltinID");
19396 }
19397 Function *F = CGM.getIntrinsic(ID);
19398 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19399 return Builder.CreateCall(F, {X, Y, M4Value});
19400 }
19401
19402 case SystemZ::BI__builtin_s390_vlbrh:
19403 case SystemZ::BI__builtin_s390_vlbrf:
19404 case SystemZ::BI__builtin_s390_vlbrg: {
19405 llvm::Type *ResultType = ConvertType(E->getType());
19406 Value *X = EmitScalarExpr(E->getArg(0));
19407 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
19408 return Builder.CreateCall(F, X);
19409 }
19410
19411 // Vector intrinsics that output the post-instruction CC value.
19412
19413#define INTRINSIC_WITH_CC(NAME) \
19414 case SystemZ::BI__builtin_##NAME: \
19415 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
19416
19417 INTRINSIC_WITH_CC(s390_vpkshs);
19418 INTRINSIC_WITH_CC(s390_vpksfs);
19419 INTRINSIC_WITH_CC(s390_vpksgs);
19420
19421 INTRINSIC_WITH_CC(s390_vpklshs);
19422 INTRINSIC_WITH_CC(s390_vpklsfs);
19423 INTRINSIC_WITH_CC(s390_vpklsgs);
19424
19425 INTRINSIC_WITH_CC(s390_vceqbs);
19426 INTRINSIC_WITH_CC(s390_vceqhs);
19427 INTRINSIC_WITH_CC(s390_vceqfs);
19428 INTRINSIC_WITH_CC(s390_vceqgs);
19429
19430 INTRINSIC_WITH_CC(s390_vchbs);
19431 INTRINSIC_WITH_CC(s390_vchhs);
19432 INTRINSIC_WITH_CC(s390_vchfs);
19433 INTRINSIC_WITH_CC(s390_vchgs);
19434
19435 INTRINSIC_WITH_CC(s390_vchlbs);
19436 INTRINSIC_WITH_CC(s390_vchlhs);
19437 INTRINSIC_WITH_CC(s390_vchlfs);
19438 INTRINSIC_WITH_CC(s390_vchlgs);
19439
19440 INTRINSIC_WITH_CC(s390_vfaebs);
19441 INTRINSIC_WITH_CC(s390_vfaehs);
19442 INTRINSIC_WITH_CC(s390_vfaefs);
19443
19444 INTRINSIC_WITH_CC(s390_vfaezbs);
19445 INTRINSIC_WITH_CC(s390_vfaezhs);
19446 INTRINSIC_WITH_CC(s390_vfaezfs);
19447
19448 INTRINSIC_WITH_CC(s390_vfeebs);
19449 INTRINSIC_WITH_CC(s390_vfeehs);
19450 INTRINSIC_WITH_CC(s390_vfeefs);
19451
19452 INTRINSIC_WITH_CC(s390_vfeezbs);
19453 INTRINSIC_WITH_CC(s390_vfeezhs);
19454 INTRINSIC_WITH_CC(s390_vfeezfs);
19455
19456 INTRINSIC_WITH_CC(s390_vfenebs);
19457 INTRINSIC_WITH_CC(s390_vfenehs);
19458 INTRINSIC_WITH_CC(s390_vfenefs);
19459
19460 INTRINSIC_WITH_CC(s390_vfenezbs);
19461 INTRINSIC_WITH_CC(s390_vfenezhs);
19462 INTRINSIC_WITH_CC(s390_vfenezfs);
19463
19464 INTRINSIC_WITH_CC(s390_vistrbs);
19465 INTRINSIC_WITH_CC(s390_vistrhs);
19466 INTRINSIC_WITH_CC(s390_vistrfs);
19467
19468 INTRINSIC_WITH_CC(s390_vstrcbs);
19469 INTRINSIC_WITH_CC(s390_vstrchs);
19470 INTRINSIC_WITH_CC(s390_vstrcfs);
19471
19472 INTRINSIC_WITH_CC(s390_vstrczbs);
19473 INTRINSIC_WITH_CC(s390_vstrczhs);
19474 INTRINSIC_WITH_CC(s390_vstrczfs);
19475
19476 INTRINSIC_WITH_CC(s390_vfcesbs);
19477 INTRINSIC_WITH_CC(s390_vfcedbs);
19478 INTRINSIC_WITH_CC(s390_vfchsbs);
19479 INTRINSIC_WITH_CC(s390_vfchdbs);
19480 INTRINSIC_WITH_CC(s390_vfchesbs);
19481 INTRINSIC_WITH_CC(s390_vfchedbs);
19482
19483 INTRINSIC_WITH_CC(s390_vftcisb);
19484 INTRINSIC_WITH_CC(s390_vftcidb);
19485
19486 INTRINSIC_WITH_CC(s390_vstrsb);
19487 INTRINSIC_WITH_CC(s390_vstrsh);
19488 INTRINSIC_WITH_CC(s390_vstrsf);
19489
19490 INTRINSIC_WITH_CC(s390_vstrszb);
19491 INTRINSIC_WITH_CC(s390_vstrszh);
19492 INTRINSIC_WITH_CC(s390_vstrszf);
19493
19494#undef INTRINSIC_WITH_CC
19495
19496 default:
19497 return nullptr;
19498 }
19499}
19500
19501namespace {
19502// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
19503struct NVPTXMmaLdstInfo {
19504 unsigned NumResults; // Number of elements to load/store
19505 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
19506 unsigned IID_col;
19507 unsigned IID_row;
19508};
19509
19510#define MMA_INTR(geom_op_type, layout) \
19511 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
19512#define MMA_LDST(n, geom_op_type) \
19513 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
19514
19515static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
19516 switch (BuiltinID) {
19517 // FP MMA loads
19518 case NVPTX::BI__hmma_m16n16k16_ld_a:
19519 return MMA_LDST(8, m16n16k16_load_a_f16);
19520 case NVPTX::BI__hmma_m16n16k16_ld_b:
19521 return MMA_LDST(8, m16n16k16_load_b_f16);
19522 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19523 return MMA_LDST(4, m16n16k16_load_c_f16);
19524 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19525 return MMA_LDST(8, m16n16k16_load_c_f32);
19526 case NVPTX::BI__hmma_m32n8k16_ld_a:
19527 return MMA_LDST(8, m32n8k16_load_a_f16);
19528 case NVPTX::BI__hmma_m32n8k16_ld_b:
19529 return MMA_LDST(8, m32n8k16_load_b_f16);
19530 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19531 return MMA_LDST(4, m32n8k16_load_c_f16);
19532 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19533 return MMA_LDST(8, m32n8k16_load_c_f32);
19534 case NVPTX::BI__hmma_m8n32k16_ld_a:
19535 return MMA_LDST(8, m8n32k16_load_a_f16);
19536 case NVPTX::BI__hmma_m8n32k16_ld_b:
19537 return MMA_LDST(8, m8n32k16_load_b_f16);
19538 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
19539 return MMA_LDST(4, m8n32k16_load_c_f16);
19540 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
19541 return MMA_LDST(8, m8n32k16_load_c_f32);
19542
19543 // Integer MMA loads
19544 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
19545 return MMA_LDST(2, m16n16k16_load_a_s8);
19546 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
19547 return MMA_LDST(2, m16n16k16_load_a_u8);
19548 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
19549 return MMA_LDST(2, m16n16k16_load_b_s8);
19550 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
19551 return MMA_LDST(2, m16n16k16_load_b_u8);
19552 case NVPTX::BI__imma_m16n16k16_ld_c:
19553 return MMA_LDST(8, m16n16k16_load_c_s32);
19554 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
19555 return MMA_LDST(4, m32n8k16_load_a_s8);
19556 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
19557 return MMA_LDST(4, m32n8k16_load_a_u8);
19558 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
19559 return MMA_LDST(1, m32n8k16_load_b_s8);
19560 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
19561 return MMA_LDST(1, m32n8k16_load_b_u8);
19562 case NVPTX::BI__imma_m32n8k16_ld_c:
19563 return MMA_LDST(8, m32n8k16_load_c_s32);
19564 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
19565 return MMA_LDST(1, m8n32k16_load_a_s8);
19566 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
19567 return MMA_LDST(1, m8n32k16_load_a_u8);
19568 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
19569 return MMA_LDST(4, m8n32k16_load_b_s8);
19570 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
19571 return MMA_LDST(4, m8n32k16_load_b_u8);
19572 case NVPTX::BI__imma_m8n32k16_ld_c:
19573 return MMA_LDST(8, m8n32k16_load_c_s32);
19574
19575 // Sub-integer MMA loads.
19576 // Only row/col layout is supported by A/B fragments.
19577 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
19578 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
19579 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
19580 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
19581 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
19582 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
19583 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
19584 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
19585 case NVPTX::BI__imma_m8n8k32_ld_c:
19586 return MMA_LDST(2, m8n8k32_load_c_s32);
19587 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
19588 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
19589 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
19590 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
19591 case NVPTX::BI__bmma_m8n8k128_ld_c:
19592 return MMA_LDST(2, m8n8k128_load_c_s32);
19593
19594 // Double MMA loads
19595 case NVPTX::BI__dmma_m8n8k4_ld_a:
19596 return MMA_LDST(1, m8n8k4_load_a_f64);
19597 case NVPTX::BI__dmma_m8n8k4_ld_b:
19598 return MMA_LDST(1, m8n8k4_load_b_f64);
19599 case NVPTX::BI__dmma_m8n8k4_ld_c:
19600 return MMA_LDST(2, m8n8k4_load_c_f64);
19601
19602 // Alternate float MMA loads
19603 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
19604 return MMA_LDST(4, m16n16k16_load_a_bf16);
19605 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
19606 return MMA_LDST(4, m16n16k16_load_b_bf16);
19607 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
19608 return MMA_LDST(2, m8n32k16_load_a_bf16);
19609 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
19610 return MMA_LDST(8, m8n32k16_load_b_bf16);
19611 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
19612 return MMA_LDST(8, m32n8k16_load_a_bf16);
19613 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
19614 return MMA_LDST(2, m32n8k16_load_b_bf16);
19615 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
19616 return MMA_LDST(4, m16n16k8_load_a_tf32);
19617 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
19618 return MMA_LDST(4, m16n16k8_load_b_tf32);
19619 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
19620 return MMA_LDST(8, m16n16k8_load_c_f32);
19621
19622 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
19623 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
19624 // use fragment C for both loads and stores.
19625 // FP MMA stores.
19626 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
19627 return MMA_LDST(4, m16n16k16_store_d_f16);
19628 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
19629 return MMA_LDST(8, m16n16k16_store_d_f32);
19630 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
19631 return MMA_LDST(4, m32n8k16_store_d_f16);
19632 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
19633 return MMA_LDST(8, m32n8k16_store_d_f32);
19634 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
19635 return MMA_LDST(4, m8n32k16_store_d_f16);
19636 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
19637 return MMA_LDST(8, m8n32k16_store_d_f32);
19638
19639 // Integer and sub-integer MMA stores.
19640 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
19641 // name, integer loads/stores use LLVM's i32.
19642 case NVPTX::BI__imma_m16n16k16_st_c_i32:
19643 return MMA_LDST(8, m16n16k16_store_d_s32);
19644 case NVPTX::BI__imma_m32n8k16_st_c_i32:
19645 return MMA_LDST(8, m32n8k16_store_d_s32);
19646 case NVPTX::BI__imma_m8n32k16_st_c_i32:
19647 return MMA_LDST(8, m8n32k16_store_d_s32);
19648 case NVPTX::BI__imma_m8n8k32_st_c_i32:
19649 return MMA_LDST(2, m8n8k32_store_d_s32);
19650 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
19651 return MMA_LDST(2, m8n8k128_store_d_s32);
19652
19653 // Double MMA store
19654 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
19655 return MMA_LDST(2, m8n8k4_store_d_f64);
19656
19657 // Alternate float MMA store
19658 case NVPTX::BI__mma_m16n16k8_st_c_f32:
19659 return MMA_LDST(8, m16n16k8_store_d_f32);
19660
19661 default:
19662 llvm_unreachable("Unknown MMA builtin");
19663 }
19664}
19665#undef MMA_LDST
19666#undef MMA_INTR
19667
19668
19669struct NVPTXMmaInfo {
19670 unsigned NumEltsA;
19671 unsigned NumEltsB;
19672 unsigned NumEltsC;
19673 unsigned NumEltsD;
19674
19675 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
19676 // over 'col' for layout. The index of non-satf variants is expected to match
19677 // the undocumented layout constants used by CUDA's mma.hpp.
19678 std::array<unsigned, 8> Variants;
19679
19680 unsigned getMMAIntrinsic(int Layout, bool Satf) {
19681 unsigned Index = Layout + 4 * Satf;
19682 if (Index >= Variants.size())
19683 return 0;
19684 return Variants[Index];
19685 }
19686};
19687
19688 // Returns an intrinsic that matches Layout and Satf for valid combinations of
19689 // Layout and Satf, 0 otherwise.
19690static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
19691 // clang-format off
19692#define MMA_VARIANTS(geom, type) \
19693 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
19694 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19695 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
19696 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
19697#define MMA_SATF_VARIANTS(geom, type) \
19698 MMA_VARIANTS(geom, type), \
19699 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
19700 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19701 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
19702 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
19703// Sub-integer MMA only supports row.col layout.
19704#define MMA_VARIANTS_I4(geom, type) \
19705 0, \
19706 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19707 0, \
19708 0, \
19709 0, \
19710 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19711 0, \
19712 0
19713// b1 MMA does not support .satfinite.
19714#define MMA_VARIANTS_B1_XOR(geom, type) \
19715 0, \
19716 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
19717 0, \
19718 0, \
19719 0, \
19720 0, \
19721 0, \
19722 0
19723#define MMA_VARIANTS_B1_AND(geom, type) \
19724 0, \
19725 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
19726 0, \
19727 0, \
19728 0, \
19729 0, \
19730 0, \
19731 0
19732 // clang-format on
19733 switch (BuiltinID) {
19734 // FP MMA
19735 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
19736 // NumEltsN of return value are ordered as A,B,C,D.
19737 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
19738 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
19739 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
19740 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
19741 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
19742 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
19743 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
19744 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
19745 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
19746 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
19747 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
19748 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
19749 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
19750 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
19751 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
19752 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
19753 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
19754 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
19755 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
19756 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
19757 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
19758 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
19759 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
19760 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
19761
19762 // Integer MMA
19763 case NVPTX::BI__imma_m16n16k16_mma_s8:
19764 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
19765 case NVPTX::BI__imma_m16n16k16_mma_u8:
19766 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
19767 case NVPTX::BI__imma_m32n8k16_mma_s8:
19768 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
19769 case NVPTX::BI__imma_m32n8k16_mma_u8:
19770 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
19771 case NVPTX::BI__imma_m8n32k16_mma_s8:
19772 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
19773 case NVPTX::BI__imma_m8n32k16_mma_u8:
19774 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
19775
19776 // Sub-integer MMA
19777 case NVPTX::BI__imma_m8n8k32_mma_s4:
19778 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
19779 case NVPTX::BI__imma_m8n8k32_mma_u4:
19780 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
19781 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
19782 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
19783 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
19784 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
19785
19786 // Double MMA
19787 case NVPTX::BI__dmma_m8n8k4_mma_f64:
19788 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
19789
19790 // Alternate FP MMA
19791 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
19792 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
19793 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
19794 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
19795 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
19796 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
19797 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
19798 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
19799 default:
19800 llvm_unreachable("Unexpected builtin ID.");
19801 }
19802#undef MMA_VARIANTS
19803#undef MMA_SATF_VARIANTS
19804#undef MMA_VARIANTS_I4
19805#undef MMA_VARIANTS_B1_AND
19806#undef MMA_VARIANTS_B1_XOR
19807}
19808
19809static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
19810 const CallExpr *E) {
19811 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
19812 QualType ArgType = E->getArg(0)->getType();
19814 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
19815 return CGF.Builder.CreateCall(
19816 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
19817 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
19818}
19819
19820static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
19821 const CallExpr *E) {
19822 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
19823 llvm::Type *ElemTy =
19825 return CGF.Builder.CreateCall(
19826 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
19827 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
19828}
19829
19830static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
19831 CodeGenFunction &CGF, const CallExpr *E,
19832 int SrcSize) {
19833 return E->getNumArgs() == 3
19834 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
19835 {CGF.EmitScalarExpr(E->getArg(0)),
19836 CGF.EmitScalarExpr(E->getArg(1)),
19837 CGF.EmitScalarExpr(E->getArg(2))})
19838 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
19839 {CGF.EmitScalarExpr(E->getArg(0)),
19840 CGF.EmitScalarExpr(E->getArg(1))});
19841}
19842
19843static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
19844 const CallExpr *E, CodeGenFunction &CGF) {
19845 auto &C = CGF.CGM.getContext();
19846 if (!(C.getLangOpts().NativeHalfType ||
19847 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
19848 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
19849 " requires native half type support.");
19850 return nullptr;
19851 }
19852
19853 if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
19854 IntrinsicID == Intrinsic::nvvm_ldu_global_f)
19855 return MakeLdgLdu(IntrinsicID, CGF, E);
19856
19858 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
19859 auto *FTy = F->getFunctionType();
19860 unsigned ICEArguments = 0;
19862 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
19863 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19864 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
19865 assert((ICEArguments & (1 << i)) == 0);
19866 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
19867 auto *PTy = FTy->getParamType(i);
19868 if (PTy != ArgValue->getType())
19869 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
19870 Args.push_back(ArgValue);
19871 }
19872
19873 return CGF.Builder.CreateCall(F, Args);
19874}
19875} // namespace
19876
19878 const CallExpr *E) {
19879 switch (BuiltinID) {
19880 case NVPTX::BI__nvvm_atom_add_gen_i:
19881 case NVPTX::BI__nvvm_atom_add_gen_l:
19882 case NVPTX::BI__nvvm_atom_add_gen_ll:
19883 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
19884
19885 case NVPTX::BI__nvvm_atom_sub_gen_i:
19886 case NVPTX::BI__nvvm_atom_sub_gen_l:
19887 case NVPTX::BI__nvvm_atom_sub_gen_ll:
19888 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
19889
19890 case NVPTX::BI__nvvm_atom_and_gen_i:
19891 case NVPTX::BI__nvvm_atom_and_gen_l:
19892 case NVPTX::BI__nvvm_atom_and_gen_ll:
19893 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
19894
19895 case NVPTX::BI__nvvm_atom_or_gen_i:
19896 case NVPTX::BI__nvvm_atom_or_gen_l:
19897 case NVPTX::BI__nvvm_atom_or_gen_ll:
19898 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
19899
19900 case NVPTX::BI__nvvm_atom_xor_gen_i:
19901 case NVPTX::BI__nvvm_atom_xor_gen_l:
19902 case NVPTX::BI__nvvm_atom_xor_gen_ll:
19903 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
19904
19905 case NVPTX::BI__nvvm_atom_xchg_gen_i:
19906 case NVPTX::BI__nvvm_atom_xchg_gen_l:
19907 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
19908 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
19909
19910 case NVPTX::BI__nvvm_atom_max_gen_i:
19911 case NVPTX::BI__nvvm_atom_max_gen_l:
19912 case NVPTX::BI__nvvm_atom_max_gen_ll:
19913 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
19914
19915 case NVPTX::BI__nvvm_atom_max_gen_ui:
19916 case NVPTX::BI__nvvm_atom_max_gen_ul:
19917 case NVPTX::BI__nvvm_atom_max_gen_ull:
19918 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
19919
19920 case NVPTX::BI__nvvm_atom_min_gen_i:
19921 case NVPTX::BI__nvvm_atom_min_gen_l:
19922 case NVPTX::BI__nvvm_atom_min_gen_ll:
19923 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
19924
19925 case NVPTX::BI__nvvm_atom_min_gen_ui:
19926 case NVPTX::BI__nvvm_atom_min_gen_ul:
19927 case NVPTX::BI__nvvm_atom_min_gen_ull:
19928 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
19929
19930 case NVPTX::BI__nvvm_atom_cas_gen_i:
19931 case NVPTX::BI__nvvm_atom_cas_gen_l:
19932 case NVPTX::BI__nvvm_atom_cas_gen_ll:
19933 // __nvvm_atom_cas_gen_* should return the old value rather than the
19934 // success flag.
19935 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
19936
19937 case NVPTX::BI__nvvm_atom_add_gen_f:
19938 case NVPTX::BI__nvvm_atom_add_gen_d: {
19939 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
19940 Value *Val = EmitScalarExpr(E->getArg(1));
19941
19942 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
19943 AtomicOrdering::SequentiallyConsistent);
19944 }
19945
19946 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
19947 Value *Ptr = EmitScalarExpr(E->getArg(0));
19948 Value *Val = EmitScalarExpr(E->getArg(1));
19949 Function *FnALI32 =
19950 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
19951 return Builder.CreateCall(FnALI32, {Ptr, Val});
19952 }
19953
19954 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
19955 Value *Ptr = EmitScalarExpr(E->getArg(0));
19956 Value *Val = EmitScalarExpr(E->getArg(1));
19957 Function *FnALD32 =
19958 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
19959 return Builder.CreateCall(FnALD32, {Ptr, Val});
19960 }
19961
19962 case NVPTX::BI__nvvm_ldg_c:
19963 case NVPTX::BI__nvvm_ldg_sc:
19964 case NVPTX::BI__nvvm_ldg_c2:
19965 case NVPTX::BI__nvvm_ldg_sc2:
19966 case NVPTX::BI__nvvm_ldg_c4:
19967 case NVPTX::BI__nvvm_ldg_sc4:
19968 case NVPTX::BI__nvvm_ldg_s:
19969 case NVPTX::BI__nvvm_ldg_s2:
19970 case NVPTX::BI__nvvm_ldg_s4:
19971 case NVPTX::BI__nvvm_ldg_i:
19972 case NVPTX::BI__nvvm_ldg_i2:
19973 case NVPTX::BI__nvvm_ldg_i4:
19974 case NVPTX::BI__nvvm_ldg_l:
19975 case NVPTX::BI__nvvm_ldg_l2:
19976 case NVPTX::BI__nvvm_ldg_ll:
19977 case NVPTX::BI__nvvm_ldg_ll2:
19978 case NVPTX::BI__nvvm_ldg_uc:
19979 case NVPTX::BI__nvvm_ldg_uc2:
19980 case NVPTX::BI__nvvm_ldg_uc4:
19981 case NVPTX::BI__nvvm_ldg_us:
19982 case NVPTX::BI__nvvm_ldg_us2:
19983 case NVPTX::BI__nvvm_ldg_us4:
19984 case NVPTX::BI__nvvm_ldg_ui:
19985 case NVPTX::BI__nvvm_ldg_ui2:
19986 case NVPTX::BI__nvvm_ldg_ui4:
19987 case NVPTX::BI__nvvm_ldg_ul:
19988 case NVPTX::BI__nvvm_ldg_ul2:
19989 case NVPTX::BI__nvvm_ldg_ull:
19990 case NVPTX::BI__nvvm_ldg_ull2:
19991 // PTX Interoperability section 2.2: "For a vector with an even number of
19992 // elements, its alignment is set to number of elements times the alignment
19993 // of its member: n*alignof(t)."
19994 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
19995 case NVPTX::BI__nvvm_ldg_f:
19996 case NVPTX::BI__nvvm_ldg_f2:
19997 case NVPTX::BI__nvvm_ldg_f4:
19998 case NVPTX::BI__nvvm_ldg_d:
19999 case NVPTX::BI__nvvm_ldg_d2:
20000 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
20001
20002 case NVPTX::BI__nvvm_ldu_c:
20003 case NVPTX::BI__nvvm_ldu_sc:
20004 case NVPTX::BI__nvvm_ldu_c2:
20005 case NVPTX::BI__nvvm_ldu_sc2:
20006 case NVPTX::BI__nvvm_ldu_c4:
20007 case NVPTX::BI__nvvm_ldu_sc4:
20008 case NVPTX::BI__nvvm_ldu_s:
20009 case NVPTX::BI__nvvm_ldu_s2:
20010 case NVPTX::BI__nvvm_ldu_s4:
20011 case NVPTX::BI__nvvm_ldu_i:
20012 case NVPTX::BI__nvvm_ldu_i2:
20013 case NVPTX::BI__nvvm_ldu_i4:
20014 case NVPTX::BI__nvvm_ldu_l:
20015 case NVPTX::BI__nvvm_ldu_l2:
20016 case NVPTX::BI__nvvm_ldu_ll:
20017 case NVPTX::BI__nvvm_ldu_ll2:
20018 case NVPTX::BI__nvvm_ldu_uc:
20019 case NVPTX::BI__nvvm_ldu_uc2:
20020 case NVPTX::BI__nvvm_ldu_uc4:
20021 case NVPTX::BI__nvvm_ldu_us:
20022 case NVPTX::BI__nvvm_ldu_us2:
20023 case NVPTX::BI__nvvm_ldu_us4:
20024 case NVPTX::BI__nvvm_ldu_ui:
20025 case NVPTX::BI__nvvm_ldu_ui2:
20026 case NVPTX::BI__nvvm_ldu_ui4:
20027 case NVPTX::BI__nvvm_ldu_ul:
20028 case NVPTX::BI__nvvm_ldu_ul2:
20029 case NVPTX::BI__nvvm_ldu_ull:
20030 case NVPTX::BI__nvvm_ldu_ull2:
20031 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
20032 case NVPTX::BI__nvvm_ldu_f:
20033 case NVPTX::BI__nvvm_ldu_f2:
20034 case NVPTX::BI__nvvm_ldu_f4:
20035 case NVPTX::BI__nvvm_ldu_d:
20036 case NVPTX::BI__nvvm_ldu_d2:
20037 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
20038
20039 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
20040 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
20041 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
20042 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
20043 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
20044 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
20045 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
20046 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
20047 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
20048 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
20049 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
20050 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
20051 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
20052 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
20053 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
20054 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
20055 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
20056 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
20057 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
20058 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
20059 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
20060 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
20061 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
20062 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
20063 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
20064 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
20065 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
20066 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
20067 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
20068 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
20069 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
20070 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
20071 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
20072 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
20073 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
20074 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
20075 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
20076 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
20077 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
20078 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
20079 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
20080 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
20081 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
20082 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
20083 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
20084 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
20085 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
20086 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
20087 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
20088 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
20089 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
20090 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
20091 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
20092 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
20093 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
20094 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
20095 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
20096 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
20097 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
20098 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
20099 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
20100 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
20101 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
20102 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
20103 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
20104 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
20105 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
20106 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
20107 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
20108 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
20109 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
20110 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
20111 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
20112 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
20113 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
20114 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
20115 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
20116 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
20117 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
20118 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
20119 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
20120 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
20121 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
20122 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
20123 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
20124 Value *Ptr = EmitScalarExpr(E->getArg(0));
20125 llvm::Type *ElemTy =
20127 return Builder.CreateCall(
20129 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
20130 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20131 }
20132 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
20133 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
20134 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
20135 Value *Ptr = EmitScalarExpr(E->getArg(0));
20136 llvm::Type *ElemTy =
20138 return Builder.CreateCall(
20140 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
20141 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20142 }
20143 case NVPTX::BI__nvvm_match_all_sync_i32p:
20144 case NVPTX::BI__nvvm_match_all_sync_i64p: {
20145 Value *Mask = EmitScalarExpr(E->getArg(0));
20146 Value *Val = EmitScalarExpr(E->getArg(1));
20147 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
20148 Value *ResultPair = Builder.CreateCall(
20149 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
20150 ? Intrinsic::nvvm_match_all_sync_i32p
20151 : Intrinsic::nvvm_match_all_sync_i64p),
20152 {Mask, Val});
20153 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
20154 PredOutPtr.getElementType());
20155 Builder.CreateStore(Pred, PredOutPtr);
20156 return Builder.CreateExtractValue(ResultPair, 0);
20157 }
20158
20159 // FP MMA loads
20160 case NVPTX::BI__hmma_m16n16k16_ld_a:
20161 case NVPTX::BI__hmma_m16n16k16_ld_b:
20162 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20163 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20164 case NVPTX::BI__hmma_m32n8k16_ld_a:
20165 case NVPTX::BI__hmma_m32n8k16_ld_b:
20166 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20167 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20168 case NVPTX::BI__hmma_m8n32k16_ld_a:
20169 case NVPTX::BI__hmma_m8n32k16_ld_b:
20170 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20171 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20172 // Integer MMA loads.
20173 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20174 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20175 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20176 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20177 case NVPTX::BI__imma_m16n16k16_ld_c:
20178 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20179 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20180 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20181 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20182 case NVPTX::BI__imma_m32n8k16_ld_c:
20183 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20184 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20185 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20186 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20187 case NVPTX::BI__imma_m8n32k16_ld_c:
20188 // Sub-integer MMA loads.
20189 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20190 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20191 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20192 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20193 case NVPTX::BI__imma_m8n8k32_ld_c:
20194 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20195 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20196 case NVPTX::BI__bmma_m8n8k128_ld_c:
20197 // Double MMA loads.
20198 case NVPTX::BI__dmma_m8n8k4_ld_a:
20199 case NVPTX::BI__dmma_m8n8k4_ld_b:
20200 case NVPTX::BI__dmma_m8n8k4_ld_c:
20201 // Alternate float MMA loads.
20202 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20203 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20204 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20205 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20206 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20207 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20208 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20209 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20210 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
20212 Value *Src = EmitScalarExpr(E->getArg(1));
20213 Value *Ldm = EmitScalarExpr(E->getArg(2));
20214 std::optional<llvm::APSInt> isColMajorArg =
20216 if (!isColMajorArg)
20217 return nullptr;
20218 bool isColMajor = isColMajorArg->getSExtValue();
20219 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20220 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20221 if (IID == 0)
20222 return nullptr;
20223
20224 Value *Result =
20225 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
20226
20227 // Save returned values.
20228 assert(II.NumResults);
20229 if (II.NumResults == 1) {
20232 } else {
20233 for (unsigned i = 0; i < II.NumResults; ++i) {
20235 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
20236 Dst.getElementType()),
20238 llvm::ConstantInt::get(IntTy, i)),
20240 }
20241 }
20242 return Result;
20243 }
20244
20245 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20246 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20247 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20248 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20249 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20250 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20251 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20252 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20253 case NVPTX::BI__imma_m8n32k16_st_c_i32:
20254 case NVPTX::BI__imma_m8n8k32_st_c_i32:
20255 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
20256 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
20257 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
20258 Value *Dst = EmitScalarExpr(E->getArg(0));
20260 Value *Ldm = EmitScalarExpr(E->getArg(2));
20261 std::optional<llvm::APSInt> isColMajorArg =
20263 if (!isColMajorArg)
20264 return nullptr;
20265 bool isColMajor = isColMajorArg->getSExtValue();
20266 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20267 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20268 if (IID == 0)
20269 return nullptr;
20270 Function *Intrinsic =
20271 CGM.getIntrinsic(IID, Dst->getType());
20272 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
20273 SmallVector<Value *, 10> Values = {Dst};
20274 for (unsigned i = 0; i < II.NumResults; ++i) {
20276 Src.getElementType(),
20278 llvm::ConstantInt::get(IntTy, i)),
20280 Values.push_back(Builder.CreateBitCast(V, ParamType));
20281 }
20282 Values.push_back(Ldm);
20283 Value *Result = Builder.CreateCall(Intrinsic, Values);
20284 return Result;
20285 }
20286
20287 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
20288 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
20289 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
20290 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
20291 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
20292 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
20293 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
20294 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
20295 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
20296 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
20297 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
20298 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
20299 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
20300 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
20301 case NVPTX::BI__imma_m16n16k16_mma_s8:
20302 case NVPTX::BI__imma_m16n16k16_mma_u8:
20303 case NVPTX::BI__imma_m32n8k16_mma_s8:
20304 case NVPTX::BI__imma_m32n8k16_mma_u8:
20305 case NVPTX::BI__imma_m8n32k16_mma_s8:
20306 case NVPTX::BI__imma_m8n32k16_mma_u8:
20307 case NVPTX::BI__imma_m8n8k32_mma_s4:
20308 case NVPTX::BI__imma_m8n8k32_mma_u4:
20309 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20310 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20311 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20312 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20313 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20314 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20315 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
20320 std::optional<llvm::APSInt> LayoutArg =
20322 if (!LayoutArg)
20323 return nullptr;
20324 int Layout = LayoutArg->getSExtValue();
20325 if (Layout < 0 || Layout > 3)
20326 return nullptr;
20327 llvm::APSInt SatfArg;
20328 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
20329 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
20330 SatfArg = 0; // .b1 does not have satf argument.
20331 else if (std::optional<llvm::APSInt> OptSatfArg =
20333 SatfArg = *OptSatfArg;
20334 else
20335 return nullptr;
20336 bool Satf = SatfArg.getSExtValue();
20337 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
20338 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
20339 if (IID == 0) // Unsupported combination of Layout/Satf.
20340 return nullptr;
20341
20343 Function *Intrinsic = CGM.getIntrinsic(IID);
20344 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
20345 // Load A
20346 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
20348 SrcA.getElementType(),
20349 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
20350 llvm::ConstantInt::get(IntTy, i)),
20352 Values.push_back(Builder.CreateBitCast(V, AType));
20353 }
20354 // Load B
20355 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
20356 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
20358 SrcB.getElementType(),
20359 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
20360 llvm::ConstantInt::get(IntTy, i)),
20362 Values.push_back(Builder.CreateBitCast(V, BType));
20363 }
20364 // Load C
20365 llvm::Type *CType =
20366 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
20367 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
20369 SrcC.getElementType(),
20370 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
20371 llvm::ConstantInt::get(IntTy, i)),
20373 Values.push_back(Builder.CreateBitCast(V, CType));
20374 }
20375 Value *Result = Builder.CreateCall(Intrinsic, Values);
20376 llvm::Type *DType = Dst.getElementType();
20377 for (unsigned i = 0; i < MI.NumEltsD; ++i)
20379 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
20381 llvm::ConstantInt::get(IntTy, i)),
20383 return Result;
20384 }
20385 // The following builtins require half type support
20386 case NVPTX::BI__nvvm_ex2_approx_f16:
20387 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
20388 case NVPTX::BI__nvvm_ex2_approx_f16x2:
20389 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
20390 case NVPTX::BI__nvvm_ff2f16x2_rn:
20391 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
20392 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
20393 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
20394 case NVPTX::BI__nvvm_ff2f16x2_rz:
20395 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
20396 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
20397 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
20398 case NVPTX::BI__nvvm_fma_rn_f16:
20399 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
20400 case NVPTX::BI__nvvm_fma_rn_f16x2:
20401 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
20402 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
20403 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
20404 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
20405 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
20406 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
20407 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
20408 *this);
20409 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
20410 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
20411 *this);
20412 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
20413 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
20414 *this);
20415 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
20416 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
20417 *this);
20418 case NVPTX::BI__nvvm_fma_rn_relu_f16:
20419 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
20420 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
20421 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
20422 case NVPTX::BI__nvvm_fma_rn_sat_f16:
20423 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
20424 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
20425 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
20426 case NVPTX::BI__nvvm_fmax_f16:
20427 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
20428 case NVPTX::BI__nvvm_fmax_f16x2:
20429 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
20430 case NVPTX::BI__nvvm_fmax_ftz_f16:
20431 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
20432 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
20433 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
20434 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
20435 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
20436 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
20437 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
20438 *this);
20439 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
20440 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
20441 E, *this);
20442 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
20443 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
20444 BuiltinID, E, *this);
20445 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
20446 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
20447 *this);
20448 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
20449 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
20450 E, *this);
20451 case NVPTX::BI__nvvm_fmax_nan_f16:
20452 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
20453 case NVPTX::BI__nvvm_fmax_nan_f16x2:
20454 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
20455 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
20456 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
20457 *this);
20458 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
20459 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
20460 E, *this);
20461 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
20462 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
20463 *this);
20464 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
20465 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
20466 *this);
20467 case NVPTX::BI__nvvm_fmin_f16:
20468 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
20469 case NVPTX::BI__nvvm_fmin_f16x2:
20470 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
20471 case NVPTX::BI__nvvm_fmin_ftz_f16:
20472 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
20473 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
20474 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
20475 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
20476 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
20477 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
20478 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
20479 *this);
20480 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
20481 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
20482 E, *this);
20483 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
20484 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
20485 BuiltinID, E, *this);
20486 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
20487 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
20488 *this);
20489 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
20490 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
20491 E, *this);
20492 case NVPTX::BI__nvvm_fmin_nan_f16:
20493 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
20494 case NVPTX::BI__nvvm_fmin_nan_f16x2:
20495 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
20496 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
20497 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
20498 *this);
20499 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
20500 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
20501 E, *this);
20502 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
20503 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
20504 *this);
20505 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
20506 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
20507 *this);
20508 case NVPTX::BI__nvvm_ldg_h:
20509 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20510 case NVPTX::BI__nvvm_ldg_h2:
20511 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20512 case NVPTX::BI__nvvm_ldu_h:
20513 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20514 case NVPTX::BI__nvvm_ldu_h2: {
20515 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20516 }
20517 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
20518 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
20519 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
20520 4);
20521 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
20522 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
20523 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
20524 8);
20525 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
20526 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
20527 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
20528 16);
20529 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
20530 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
20531 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
20532 16);
20533 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
20534 return Builder.CreateCall(
20535 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
20536 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
20537 return Builder.CreateCall(
20538 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
20539 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
20540 return Builder.CreateCall(
20541 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
20542 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
20543 return Builder.CreateCall(
20544 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
20545 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
20546 return Builder.CreateCall(
20547 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
20548 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
20549 return Builder.CreateCall(
20550 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
20551 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
20552 return Builder.CreateCall(
20553 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
20554 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
20555 return Builder.CreateCall(
20556 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
20557 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
20558 return Builder.CreateCall(
20559 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
20560 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
20561 return Builder.CreateCall(
20562 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
20563 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
20564 return Builder.CreateCall(
20565 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
20566 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
20567 return Builder.CreateCall(
20568 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
20569 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
20570 return Builder.CreateCall(
20571 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
20572 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
20573 return Builder.CreateCall(
20574 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
20575 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
20576 return Builder.CreateCall(
20577 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
20578 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
20579 return Builder.CreateCall(
20580 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
20581 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
20582 return Builder.CreateCall(
20583 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
20584 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
20585 return Builder.CreateCall(
20586 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
20587 case NVPTX::BI__nvvm_is_explicit_cluster:
20588 return Builder.CreateCall(
20589 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
20590 case NVPTX::BI__nvvm_isspacep_shared_cluster:
20591 return Builder.CreateCall(
20592 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
20593 EmitScalarExpr(E->getArg(0)));
20594 case NVPTX::BI__nvvm_mapa:
20595 return Builder.CreateCall(
20596 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
20597 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20598 case NVPTX::BI__nvvm_mapa_shared_cluster:
20599 return Builder.CreateCall(
20600 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
20601 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20602 case NVPTX::BI__nvvm_getctarank:
20603 return Builder.CreateCall(
20604 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
20605 EmitScalarExpr(E->getArg(0)));
20606 case NVPTX::BI__nvvm_getctarank_shared_cluster:
20607 return Builder.CreateCall(
20608 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
20609 EmitScalarExpr(E->getArg(0)));
20610 case NVPTX::BI__nvvm_barrier_cluster_arrive:
20611 return Builder.CreateCall(
20612 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
20613 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
20614 return Builder.CreateCall(
20615 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
20616 case NVPTX::BI__nvvm_barrier_cluster_wait:
20617 return Builder.CreateCall(
20618 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
20619 case NVPTX::BI__nvvm_fence_sc_cluster:
20620 return Builder.CreateCall(
20621 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
20622 default:
20623 return nullptr;
20624 }
20625}
20626
20627namespace {
20628struct BuiltinAlignArgs {
20629 llvm::Value *Src = nullptr;
20630 llvm::Type *SrcType = nullptr;
20631 llvm::Value *Alignment = nullptr;
20632 llvm::Value *Mask = nullptr;
20633 llvm::IntegerType *IntType = nullptr;
20634
20635 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
20636 QualType AstType = E->getArg(0)->getType();
20637 if (AstType->isArrayType())
20638 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
20639 else
20640 Src = CGF.EmitScalarExpr(E->getArg(0));
20641 SrcType = Src->getType();
20642 if (SrcType->isPointerTy()) {
20643 IntType = IntegerType::get(
20644 CGF.getLLVMContext(),
20645 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
20646 } else {
20647 assert(SrcType->isIntegerTy());
20648 IntType = cast<llvm::IntegerType>(SrcType);
20649 }
20650 Alignment = CGF.EmitScalarExpr(E->getArg(1));
20651 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
20652 auto *One = llvm::ConstantInt::get(IntType, 1);
20653 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
20654 }
20655};
20656} // namespace
20657
20658/// Generate (x & (y-1)) == 0.
20660 BuiltinAlignArgs Args(E, *this);
20661 llvm::Value *SrcAddress = Args.Src;
20662 if (Args.SrcType->isPointerTy())
20663 SrcAddress =
20664 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
20665 return RValue::get(Builder.CreateICmpEQ(
20666 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
20667 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
20668}
20669
20670/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
20671/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
20672/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
20673RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
20674 BuiltinAlignArgs Args(E, *this);
20675 llvm::Value *SrcForMask = Args.Src;
20676 if (AlignUp) {
20677 // When aligning up we have to first add the mask to ensure we go over the
20678 // next alignment value and then align down to the next valid multiple.
20679 // By adding the mask, we ensure that align_up on an already aligned
20680 // value will not change the value.
20681 if (Args.Src->getType()->isPointerTy()) {
20682 if (getLangOpts().isSignedOverflowDefined())
20683 SrcForMask =
20684 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
20685 else
20686 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
20687 /*SignedIndices=*/true,
20688 /*isSubtraction=*/false,
20689 E->getExprLoc(), "over_boundary");
20690 } else {
20691 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
20692 }
20693 }
20694 // Invert the mask to only clear the lower bits.
20695 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
20696 llvm::Value *Result = nullptr;
20697 if (Args.Src->getType()->isPointerTy()) {
20698 Result = Builder.CreateIntrinsic(
20699 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
20700 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
20701 } else {
20702 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
20703 }
20704 assert(Result->getType() == Args.SrcType);
20705 return RValue::get(Result);
20706}
20707
20709 const CallExpr *E) {
20710 switch (BuiltinID) {
20711 case WebAssembly::BI__builtin_wasm_memory_size: {
20712 llvm::Type *ResultType = ConvertType(E->getType());
20713 Value *I = EmitScalarExpr(E->getArg(0));
20714 Function *Callee =
20715 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
20716 return Builder.CreateCall(Callee, I);
20717 }
20718 case WebAssembly::BI__builtin_wasm_memory_grow: {
20719 llvm::Type *ResultType = ConvertType(E->getType());
20720 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
20721 EmitScalarExpr(E->getArg(1))};
20722 Function *Callee =
20723 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
20724 return Builder.CreateCall(Callee, Args);
20725 }
20726 case WebAssembly::BI__builtin_wasm_tls_size: {
20727 llvm::Type *ResultType = ConvertType(E->getType());
20728 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
20729 return Builder.CreateCall(Callee);
20730 }
20731 case WebAssembly::BI__builtin_wasm_tls_align: {
20732 llvm::Type *ResultType = ConvertType(E->getType());
20733 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
20734 return Builder.CreateCall(Callee);
20735 }
20736 case WebAssembly::BI__builtin_wasm_tls_base: {
20737 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
20738 return Builder.CreateCall(Callee);
20739 }
20740 case WebAssembly::BI__builtin_wasm_throw: {
20741 Value *Tag = EmitScalarExpr(E->getArg(0));
20742 Value *Obj = EmitScalarExpr(E->getArg(1));
20743 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
20744 return Builder.CreateCall(Callee, {Tag, Obj});
20745 }
20746 case WebAssembly::BI__builtin_wasm_rethrow: {
20747 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
20748 return Builder.CreateCall(Callee);
20749 }
20750 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
20751 Value *Addr = EmitScalarExpr(E->getArg(0));
20753 Value *Timeout = EmitScalarExpr(E->getArg(2));
20754 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
20755 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
20756 }
20757 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
20758 Value *Addr = EmitScalarExpr(E->getArg(0));
20760 Value *Timeout = EmitScalarExpr(E->getArg(2));
20761 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
20762 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
20763 }
20764 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
20765 Value *Addr = EmitScalarExpr(E->getArg(0));
20766 Value *Count = EmitScalarExpr(E->getArg(1));
20767 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
20768 return Builder.CreateCall(Callee, {Addr, Count});
20769 }
20770 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
20771 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
20772 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
20773 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
20774 Value *Src = EmitScalarExpr(E->getArg(0));
20775 llvm::Type *ResT = ConvertType(E->getType());
20776 Function *Callee =
20777 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
20778 return Builder.CreateCall(Callee, {Src});
20779 }
20780 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
20781 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
20782 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
20783 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
20784 Value *Src = EmitScalarExpr(E->getArg(0));
20785 llvm::Type *ResT = ConvertType(E->getType());
20786 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
20787 {ResT, Src->getType()});
20788 return Builder.CreateCall(Callee, {Src});
20789 }
20790 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
20791 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
20792 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
20793 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
20794 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
20795 Value *Src = EmitScalarExpr(E->getArg(0));
20796 llvm::Type *ResT = ConvertType(E->getType());
20797 Function *Callee =
20798 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
20799 return Builder.CreateCall(Callee, {Src});
20800 }
20801 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
20802 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
20803 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
20804 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
20805 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
20806 Value *Src = EmitScalarExpr(E->getArg(0));
20807 llvm::Type *ResT = ConvertType(E->getType());
20808 Function *Callee =
20809 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
20810 return Builder.CreateCall(Callee, {Src});
20811 }
20812 case WebAssembly::BI__builtin_wasm_min_f32:
20813 case WebAssembly::BI__builtin_wasm_min_f64:
20814 case WebAssembly::BI__builtin_wasm_min_f32x4:
20815 case WebAssembly::BI__builtin_wasm_min_f64x2: {
20816 Value *LHS = EmitScalarExpr(E->getArg(0));
20817 Value *RHS = EmitScalarExpr(E->getArg(1));
20818 Function *Callee =
20819 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
20820 return Builder.CreateCall(Callee, {LHS, RHS});
20821 }
20822 case WebAssembly::BI__builtin_wasm_max_f32:
20823 case WebAssembly::BI__builtin_wasm_max_f64:
20824 case WebAssembly::BI__builtin_wasm_max_f32x4:
20825 case WebAssembly::BI__builtin_wasm_max_f64x2: {
20826 Value *LHS = EmitScalarExpr(E->getArg(0));
20827 Value *RHS = EmitScalarExpr(E->getArg(1));
20828 Function *Callee =
20829 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
20830 return Builder.CreateCall(Callee, {LHS, RHS});
20831 }
20832 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
20833 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
20834 Value *LHS = EmitScalarExpr(E->getArg(0));
20835 Value *RHS = EmitScalarExpr(E->getArg(1));
20836 Function *Callee =
20837 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
20838 return Builder.CreateCall(Callee, {LHS, RHS});
20839 }
20840 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
20841 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
20842 Value *LHS = EmitScalarExpr(E->getArg(0));
20843 Value *RHS = EmitScalarExpr(E->getArg(1));
20844 Function *Callee =
20845 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
20846 return Builder.CreateCall(Callee, {LHS, RHS});
20847 }
20848 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
20849 case WebAssembly::BI__builtin_wasm_floor_f32x4:
20850 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
20851 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
20852 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
20853 case WebAssembly::BI__builtin_wasm_floor_f64x2:
20854 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
20855 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
20856 unsigned IntNo;
20857 switch (BuiltinID) {
20858 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
20859 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
20860 IntNo = Intrinsic::ceil;
20861 break;
20862 case WebAssembly::BI__builtin_wasm_floor_f32x4:
20863 case WebAssembly::BI__builtin_wasm_floor_f64x2:
20864 IntNo = Intrinsic::floor;
20865 break;
20866 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
20867 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
20868 IntNo = Intrinsic::trunc;
20869 break;
20870 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
20871 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
20872 IntNo = Intrinsic::nearbyint;
20873 break;
20874 default:
20875 llvm_unreachable("unexpected builtin ID");
20876 }
20877 Value *Value = EmitScalarExpr(E->getArg(0));
20879 return Builder.CreateCall(Callee, Value);
20880 }
20881 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
20882 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
20883 return Builder.CreateCall(Callee);
20884 }
20885 case WebAssembly::BI__builtin_wasm_ref_null_func: {
20886 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
20887 return Builder.CreateCall(Callee);
20888 }
20889 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
20890 Value *Src = EmitScalarExpr(E->getArg(0));
20891 Value *Indices = EmitScalarExpr(E->getArg(1));
20892 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
20893 return Builder.CreateCall(Callee, {Src, Indices});
20894 }
20895 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
20896 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
20897 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
20898 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
20899 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
20900 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
20901 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
20902 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
20903 unsigned IntNo;
20904 switch (BuiltinID) {
20905 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
20906 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
20907 IntNo = Intrinsic::sadd_sat;
20908 break;
20909 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
20910 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
20911 IntNo = Intrinsic::uadd_sat;
20912 break;
20913 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
20914 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
20915 IntNo = Intrinsic::wasm_sub_sat_signed;
20916 break;
20917 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
20918 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
20919 IntNo = Intrinsic::wasm_sub_sat_unsigned;
20920 break;
20921 default:
20922 llvm_unreachable("unexpected builtin ID");
20923 }
20924 Value *LHS = EmitScalarExpr(E->getArg(0));
20925 Value *RHS = EmitScalarExpr(E->getArg(1));
20927 return Builder.CreateCall(Callee, {LHS, RHS});
20928 }
20929 case WebAssembly::BI__builtin_wasm_abs_i8x16:
20930 case WebAssembly::BI__builtin_wasm_abs_i16x8:
20931 case WebAssembly::BI__builtin_wasm_abs_i32x4:
20932 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
20933 Value *Vec = EmitScalarExpr(E->getArg(0));
20934 Value *Neg = Builder.CreateNeg(Vec, "neg");
20935 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
20936 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
20937 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
20938 }
20939 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20940 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20941 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20942 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20943 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20944 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20945 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20946 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20947 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20948 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20949 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20950 case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
20951 Value *LHS = EmitScalarExpr(E->getArg(0));
20952 Value *RHS = EmitScalarExpr(E->getArg(1));
20953 Value *ICmp;
20954 switch (BuiltinID) {
20955 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20956 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20957 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20958 ICmp = Builder.CreateICmpSLT(LHS, RHS);
20959 break;
20960 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20961 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20962 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20963 ICmp = Builder.CreateICmpULT(LHS, RHS);
20964 break;
20965 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20966 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20967 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20968 ICmp = Builder.CreateICmpSGT(LHS, RHS);
20969 break;
20970 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20971 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20972 case WebAssembly::BI__builtin_wasm_max_u_i32x4:
20973 ICmp = Builder.CreateICmpUGT(LHS, RHS);
20974 break;
20975 default:
20976 llvm_unreachable("unexpected builtin ID");
20977 }
20978 return Builder.CreateSelect(ICmp, LHS, RHS);
20979 }
20980 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
20981 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
20982 Value *LHS = EmitScalarExpr(E->getArg(0));
20983 Value *RHS = EmitScalarExpr(E->getArg(1));
20984 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
20985 ConvertType(E->getType()));
20986 return Builder.CreateCall(Callee, {LHS, RHS});
20987 }
20988 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
20989 Value *LHS = EmitScalarExpr(E->getArg(0));
20990 Value *RHS = EmitScalarExpr(E->getArg(1));
20991 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
20992 return Builder.CreateCall(Callee, {LHS, RHS});
20993 }
20994 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
20995 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
20996 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
20997 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
20998 Value *Vec = EmitScalarExpr(E->getArg(0));
20999 unsigned IntNo;
21000 switch (BuiltinID) {
21001 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
21002 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
21003 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
21004 break;
21005 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
21006 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
21007 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
21008 break;
21009 default:
21010 llvm_unreachable("unexpected builtin ID");
21011 }
21012
21014 return Builder.CreateCall(Callee, Vec);
21015 }
21016 case WebAssembly::BI__builtin_wasm_bitselect: {
21017 Value *V1 = EmitScalarExpr(E->getArg(0));
21018 Value *V2 = EmitScalarExpr(E->getArg(1));
21019 Value *C = EmitScalarExpr(E->getArg(2));
21020 Function *Callee =
21021 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
21022 return Builder.CreateCall(Callee, {V1, V2, C});
21023 }
21024 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
21025 Value *LHS = EmitScalarExpr(E->getArg(0));
21026 Value *RHS = EmitScalarExpr(E->getArg(1));
21027 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
21028 return Builder.CreateCall(Callee, {LHS, RHS});
21029 }
21030 case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
21031 Value *Vec = EmitScalarExpr(E->getArg(0));
21032 Function *Callee =
21033 CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
21034 return Builder.CreateCall(Callee, {Vec});
21035 }
21036 case WebAssembly::BI__builtin_wasm_any_true_v128:
21037 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21038 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21039 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21040 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
21041 unsigned IntNo;
21042 switch (BuiltinID) {
21043 case WebAssembly::BI__builtin_wasm_any_true_v128:
21044 IntNo = Intrinsic::wasm_anytrue;
21045 break;
21046 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21047 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21048 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21049 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
21050 IntNo = Intrinsic::wasm_alltrue;
21051 break;
21052 default:
21053 llvm_unreachable("unexpected builtin ID");
21054 }
21055 Value *Vec = EmitScalarExpr(E->getArg(0));
21056 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
21057 return Builder.CreateCall(Callee, {Vec});
21058 }
21059 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
21060 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
21061 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
21062 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
21063 Value *Vec = EmitScalarExpr(E->getArg(0));
21064 Function *Callee =
21065 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
21066 return Builder.CreateCall(Callee, {Vec});
21067 }
21068 case WebAssembly::BI__builtin_wasm_abs_f32x4:
21069 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
21070 Value *Vec = EmitScalarExpr(E->getArg(0));
21071 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
21072 return Builder.CreateCall(Callee, {Vec});
21073 }
21074 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
21075 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
21076 Value *Vec = EmitScalarExpr(E->getArg(0));
21077 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
21078 return Builder.CreateCall(Callee, {Vec});
21079 }
21080 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21081 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21082 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21083 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
21084 Value *Low = EmitScalarExpr(E->getArg(0));
21085 Value *High = EmitScalarExpr(E->getArg(1));
21086 unsigned IntNo;
21087 switch (BuiltinID) {
21088 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21089 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21090 IntNo = Intrinsic::wasm_narrow_signed;
21091 break;
21092 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21093 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
21094 IntNo = Intrinsic::wasm_narrow_unsigned;
21095 break;
21096 default:
21097 llvm_unreachable("unexpected builtin ID");
21098 }
21099 Function *Callee =
21100 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
21101 return Builder.CreateCall(Callee, {Low, High});
21102 }
21103 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21104 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
21105 Value *Vec = EmitScalarExpr(E->getArg(0));
21106 unsigned IntNo;
21107 switch (BuiltinID) {
21108 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21109 IntNo = Intrinsic::fptosi_sat;
21110 break;
21111 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
21112 IntNo = Intrinsic::fptoui_sat;
21113 break;
21114 default:
21115 llvm_unreachable("unexpected builtin ID");
21116 }
21117 llvm::Type *SrcT = Vec->getType();
21118 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
21119 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
21120 Value *Trunc = Builder.CreateCall(Callee, Vec);
21121 Value *Splat = Constant::getNullValue(TruncT);
21122 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
21123 }
21124 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
21125 Value *Ops[18];
21126 size_t OpIdx = 0;
21127 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
21128 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
21129 while (OpIdx < 18) {
21130 std::optional<llvm::APSInt> LaneConst =
21132 assert(LaneConst && "Constant arg isn't actually constant?");
21133 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
21134 }
21135 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
21136 return Builder.CreateCall(Callee, Ops);
21137 }
21138 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21139 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21140 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21141 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
21142 Value *A = EmitScalarExpr(E->getArg(0));
21143 Value *B = EmitScalarExpr(E->getArg(1));
21144 Value *C = EmitScalarExpr(E->getArg(2));
21145 unsigned IntNo;
21146 switch (BuiltinID) {
21147 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21148 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21149 IntNo = Intrinsic::wasm_relaxed_madd;
21150 break;
21151 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21152 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
21153 IntNo = Intrinsic::wasm_relaxed_nmadd;
21154 break;
21155 default:
21156 llvm_unreachable("unexpected builtin ID");
21157 }
21158 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
21159 return Builder.CreateCall(Callee, {A, B, C});
21160 }
21161 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
21162 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
21163 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
21164 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
21165 Value *A = EmitScalarExpr(E->getArg(0));
21166 Value *B = EmitScalarExpr(E->getArg(1));
21167 Value *C = EmitScalarExpr(E->getArg(2));
21168 Function *Callee =
21169 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
21170 return Builder.CreateCall(Callee, {A, B, C});
21171 }
21172 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
21173 Value *Src = EmitScalarExpr(E->getArg(0));
21174 Value *Indices = EmitScalarExpr(E->getArg(1));
21175 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
21176 return Builder.CreateCall(Callee, {Src, Indices});
21177 }
21178 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21179 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21180 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21181 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
21182 Value *LHS = EmitScalarExpr(E->getArg(0));
21183 Value *RHS = EmitScalarExpr(E->getArg(1));
21184 unsigned IntNo;
21185 switch (BuiltinID) {
21186 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21187 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21188 IntNo = Intrinsic::wasm_relaxed_min;
21189 break;
21190 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21191 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
21192 IntNo = Intrinsic::wasm_relaxed_max;
21193 break;
21194 default:
21195 llvm_unreachable("unexpected builtin ID");
21196 }
21197 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
21198 return Builder.CreateCall(Callee, {LHS, RHS});
21199 }
21200 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21201 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21202 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21203 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
21204 Value *Vec = EmitScalarExpr(E->getArg(0));
21205 unsigned IntNo;
21206 switch (BuiltinID) {
21207 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21208 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
21209 break;
21210 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21211 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
21212 break;
21213 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21214 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
21215 break;
21216 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
21217 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
21218 break;
21219 default:
21220 llvm_unreachable("unexpected builtin ID");
21221 }
21222 Function *Callee = CGM.getIntrinsic(IntNo);
21223 return Builder.CreateCall(Callee, {Vec});
21224 }
21225 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
21226 Value *LHS = EmitScalarExpr(E->getArg(0));
21227 Value *RHS = EmitScalarExpr(E->getArg(1));
21228 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
21229 return Builder.CreateCall(Callee, {LHS, RHS});
21230 }
21231 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
21232 Value *LHS = EmitScalarExpr(E->getArg(0));
21233 Value *RHS = EmitScalarExpr(E->getArg(1));
21234 Function *Callee =
21235 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
21236 return Builder.CreateCall(Callee, {LHS, RHS});
21237 }
21238 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
21239 Value *LHS = EmitScalarExpr(E->getArg(0));
21240 Value *RHS = EmitScalarExpr(E->getArg(1));
21241 Value *Acc = EmitScalarExpr(E->getArg(2));
21242 Function *Callee =
21243 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
21244 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21245 }
21246 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
21247 Value *LHS = EmitScalarExpr(E->getArg(0));
21248 Value *RHS = EmitScalarExpr(E->getArg(1));
21249 Value *Acc = EmitScalarExpr(E->getArg(2));
21250 Function *Callee =
21251 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
21252 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21253 }
21254 case WebAssembly::BI__builtin_wasm_table_get: {
21255 assert(E->getArg(0)->getType()->isArrayType());
21256 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21257 Value *Index = EmitScalarExpr(E->getArg(1));
21260 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
21261 else if (E->getType().isWebAssemblyFuncrefType())
21262 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
21263 else
21264 llvm_unreachable(
21265 "Unexpected reference type for __builtin_wasm_table_get");
21266 return Builder.CreateCall(Callee, {Table, Index});
21267 }
21268 case WebAssembly::BI__builtin_wasm_table_set: {
21269 assert(E->getArg(0)->getType()->isArrayType());
21270 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21271 Value *Index = EmitScalarExpr(E->getArg(1));
21272 Value *Val = EmitScalarExpr(E->getArg(2));
21275 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
21276 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21277 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
21278 else
21279 llvm_unreachable(
21280 "Unexpected reference type for __builtin_wasm_table_set");
21281 return Builder.CreateCall(Callee, {Table, Index, Val});
21282 }
21283 case WebAssembly::BI__builtin_wasm_table_size: {
21284 assert(E->getArg(0)->getType()->isArrayType());
21286 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
21287 return Builder.CreateCall(Callee, Value);
21288 }
21289 case WebAssembly::BI__builtin_wasm_table_grow: {
21290 assert(E->getArg(0)->getType()->isArrayType());
21291 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21292 Value *Val = EmitScalarExpr(E->getArg(1));
21293 Value *NElems = EmitScalarExpr(E->getArg(2));
21294
21297 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
21298 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21299 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21300 else
21301 llvm_unreachable(
21302 "Unexpected reference type for __builtin_wasm_table_grow");
21303
21304 return Builder.CreateCall(Callee, {Table, Val, NElems});
21305 }
21306 case WebAssembly::BI__builtin_wasm_table_fill: {
21307 assert(E->getArg(0)->getType()->isArrayType());
21308 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21309 Value *Index = EmitScalarExpr(E->getArg(1));
21310 Value *Val = EmitScalarExpr(E->getArg(2));
21311 Value *NElems = EmitScalarExpr(E->getArg(3));
21312
21315 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
21316 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21317 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21318 else
21319 llvm_unreachable(
21320 "Unexpected reference type for __builtin_wasm_table_fill");
21321
21322 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
21323 }
21324 case WebAssembly::BI__builtin_wasm_table_copy: {
21325 assert(E->getArg(0)->getType()->isArrayType());
21326 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21327 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
21328 Value *DstIdx = EmitScalarExpr(E->getArg(2));
21329 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
21330 Value *NElems = EmitScalarExpr(E->getArg(4));
21331
21332 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
21333
21334 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
21335 }
21336 default:
21337 return nullptr;
21338 }
21339}
21340
21341static std::pair<Intrinsic::ID, unsigned>
21343 struct Info {
21344 unsigned BuiltinID;
21345 Intrinsic::ID IntrinsicID;
21346 unsigned VecLen;
21347 };
21348 static Info Infos[] = {
21349#define CUSTOM_BUILTIN_MAPPING(x,s) \
21350 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
21351 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
21352 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
21353 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
21354 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
21355 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
21356 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
21357 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
21358 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
21359 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
21360 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
21361 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
21362 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
21363 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
21364 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
21365 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
21366 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
21367 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
21368 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
21369 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
21370 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
21371 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
21372 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
21373 // Legacy builtins that take a vector in place of a vector predicate.
21374 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
21375 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
21376 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
21377 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
21378 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
21379 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
21380 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
21381 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
21382#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
21383#undef CUSTOM_BUILTIN_MAPPING
21384 };
21385
21386 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
21387 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
21388 (void)SortOnce;
21389
21390 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
21391 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
21392 return {Intrinsic::not_intrinsic, 0};
21393
21394 return {F->IntrinsicID, F->VecLen};
21395}
21396
21398 const CallExpr *E) {
21399 Intrinsic::ID ID;
21400 unsigned VecLen;
21401 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
21402
21403 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
21404 // The base pointer is passed by address, so it needs to be loaded.
21407 llvm::Value *Base = Builder.CreateLoad(BP);
21408 // The treatment of both loads and stores is the same: the arguments for
21409 // the builtin are the same as the arguments for the intrinsic.
21410 // Load:
21411 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
21412 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
21413 // Store:
21414 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
21415 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
21417 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
21418 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21419
21420 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
21421 // The load intrinsics generate two results (Value, NewBase), stores
21422 // generate one (NewBase). The new base address needs to be stored.
21423 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
21424 : Result;
21425 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
21427 llvm::Value *RetVal =
21428 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
21429 if (IsLoad)
21430 RetVal = Builder.CreateExtractValue(Result, 0);
21431 return RetVal;
21432 };
21433
21434 // Handle the conversion of bit-reverse load intrinsics to bit code.
21435 // The intrinsic call after this function only reads from memory and the
21436 // write to memory is dealt by the store instruction.
21437 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
21438 // The intrinsic generates one result, which is the new value for the base
21439 // pointer. It needs to be returned. The result of the load instruction is
21440 // passed to intrinsic by address, so the value needs to be stored.
21441 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
21442
21443 // Expressions like &(*pt++) will be incremented per evaluation.
21444 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
21445 // per call.
21446 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
21447 DestAddr = DestAddr.withElementType(Int8Ty);
21448 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
21449
21450 // Operands are Base, Dest, Modifier.
21451 // The intrinsic format in LLVM IR is defined as
21452 // { ValueType, i8* } (i8*, i32).
21453 llvm::Value *Result = Builder.CreateCall(
21454 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
21455
21456 // The value needs to be stored as the variable is passed by reference.
21457 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
21458
21459 // The store needs to be truncated to fit the destination type.
21460 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
21461 // to be handled with stores of respective destination type.
21462 DestVal = Builder.CreateTrunc(DestVal, DestTy);
21463
21464 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
21465 // The updated value of the base pointer is returned.
21466 return Builder.CreateExtractValue(Result, 1);
21467 };
21468
21469 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
21470 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
21471 : Intrinsic::hexagon_V6_vandvrt;
21472 return Builder.CreateCall(CGM.getIntrinsic(ID),
21473 {Vec, Builder.getInt32(-1)});
21474 };
21475 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
21476 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
21477 : Intrinsic::hexagon_V6_vandqrt;
21478 return Builder.CreateCall(CGM.getIntrinsic(ID),
21479 {Pred, Builder.getInt32(-1)});
21480 };
21481
21482 switch (BuiltinID) {
21483 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
21484 // and the corresponding C/C++ builtins use loads/stores to update
21485 // the predicate.
21486 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
21487 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
21488 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
21489 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
21490 // Get the type from the 0-th argument.
21491 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21492 Address PredAddr =
21494 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
21495 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21496 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
21497
21498 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21499 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21500 PredAddr.getAlignment());
21501 return Builder.CreateExtractValue(Result, 0);
21502 }
21503 // These are identical to the builtins above, except they don't consume
21504 // input carry, only generate carry-out. Since they still produce two
21505 // outputs, generate the store of the predicate, but no load.
21506 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
21507 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
21508 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
21509 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
21510 // Get the type from the 0-th argument.
21511 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21512 Address PredAddr =
21514 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21515 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21516
21517 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21518 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21519 PredAddr.getAlignment());
21520 return Builder.CreateExtractValue(Result, 0);
21521 }
21522
21523 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
21524 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
21525 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
21526 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
21527 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
21528 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
21529 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
21530 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
21532 const Expr *PredOp = E->getArg(0);
21533 // There will be an implicit cast to a boolean vector. Strip it.
21534 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
21535 if (Cast->getCastKind() == CK_BitCast)
21536 PredOp = Cast->getSubExpr();
21537 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
21538 }
21539 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
21540 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21541 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
21542 }
21543
21544 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
21545 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
21546 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
21547 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
21548 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
21549 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
21550 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
21551 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
21552 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
21553 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
21554 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
21555 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
21556 return MakeCircOp(ID, /*IsLoad=*/true);
21557 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
21558 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
21559 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
21560 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
21561 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
21562 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
21563 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
21564 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
21565 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
21566 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
21567 return MakeCircOp(ID, /*IsLoad=*/false);
21568 case Hexagon::BI__builtin_brev_ldub:
21569 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
21570 case Hexagon::BI__builtin_brev_ldb:
21571 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
21572 case Hexagon::BI__builtin_brev_lduh:
21573 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
21574 case Hexagon::BI__builtin_brev_ldh:
21575 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
21576 case Hexagon::BI__builtin_brev_ldw:
21577 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
21578 case Hexagon::BI__builtin_brev_ldd:
21579 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
21580 } // switch
21581
21582 return nullptr;
21583}
21584
21586 const CallExpr *E,
21587 ReturnValueSlot ReturnValue) {
21589 llvm::Type *ResultType = ConvertType(E->getType());
21590
21591 // Find out if any arguments are required to be integer constant expressions.
21592 unsigned ICEArguments = 0;
21594 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
21595 if (Error == ASTContext::GE_Missing_type) {
21596 // Vector intrinsics don't have a type string.
21597 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
21598 BuiltinID <= clang::RISCV::LastRVVBuiltin);
21599 ICEArguments = 0;
21600 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
21601 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
21602 ICEArguments = 1 << 1;
21603 } else {
21604 assert(Error == ASTContext::GE_None && "Unexpected error");
21605 }
21606
21607 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
21608 ICEArguments |= (1 << 1);
21609 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
21610 ICEArguments |= (1 << 2);
21611
21612 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
21613 // Handle aggregate argument, namely RVV tuple types in segment load/store
21616 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress(*this));
21617 Ops.push_back(AggValue);
21618 continue;
21619 }
21620 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
21621 }
21622
21623 Intrinsic::ID ID = Intrinsic::not_intrinsic;
21624 unsigned NF = 1;
21625 // The 0th bit simulates the `vta` of RVV
21626 // The 1st bit simulates the `vma` of RVV
21627 constexpr unsigned RVV_VTA = 0x1;
21628 constexpr unsigned RVV_VMA = 0x2;
21629 int PolicyAttrs = 0;
21630 bool IsMasked = false;
21631
21632 // Required for overloaded intrinsics.
21634 switch (BuiltinID) {
21635 default: llvm_unreachable("unexpected builtin ID");
21636 case RISCV::BI__builtin_riscv_orc_b_32:
21637 case RISCV::BI__builtin_riscv_orc_b_64:
21638 case RISCV::BI__builtin_riscv_clz_32:
21639 case RISCV::BI__builtin_riscv_clz_64:
21640 case RISCV::BI__builtin_riscv_ctz_32:
21641 case RISCV::BI__builtin_riscv_ctz_64:
21642 case RISCV::BI__builtin_riscv_clmul_32:
21643 case RISCV::BI__builtin_riscv_clmul_64:
21644 case RISCV::BI__builtin_riscv_clmulh_32:
21645 case RISCV::BI__builtin_riscv_clmulh_64:
21646 case RISCV::BI__builtin_riscv_clmulr_32:
21647 case RISCV::BI__builtin_riscv_clmulr_64:
21648 case RISCV::BI__builtin_riscv_xperm4_32:
21649 case RISCV::BI__builtin_riscv_xperm4_64:
21650 case RISCV::BI__builtin_riscv_xperm8_32:
21651 case RISCV::BI__builtin_riscv_xperm8_64:
21652 case RISCV::BI__builtin_riscv_brev8_32:
21653 case RISCV::BI__builtin_riscv_brev8_64:
21654 case RISCV::BI__builtin_riscv_zip_32:
21655 case RISCV::BI__builtin_riscv_unzip_32: {
21656 switch (BuiltinID) {
21657 default: llvm_unreachable("unexpected builtin ID");
21658 // Zbb
21659 case RISCV::BI__builtin_riscv_orc_b_32:
21660 case RISCV::BI__builtin_riscv_orc_b_64:
21661 ID = Intrinsic::riscv_orc_b;
21662 break;
21663 case RISCV::BI__builtin_riscv_clz_32:
21664 case RISCV::BI__builtin_riscv_clz_64: {
21665 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
21666 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
21667 if (Result->getType() != ResultType)
21668 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
21669 "cast");
21670 return Result;
21671 }
21672 case RISCV::BI__builtin_riscv_ctz_32:
21673 case RISCV::BI__builtin_riscv_ctz_64: {
21674 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
21675 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
21676 if (Result->getType() != ResultType)
21677 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
21678 "cast");
21679 return Result;
21680 }
21681
21682 // Zbc
21683 case RISCV::BI__builtin_riscv_clmul_32:
21684 case RISCV::BI__builtin_riscv_clmul_64:
21685 ID = Intrinsic::riscv_clmul;
21686 break;
21687 case RISCV::BI__builtin_riscv_clmulh_32:
21688 case RISCV::BI__builtin_riscv_clmulh_64:
21689 ID = Intrinsic::riscv_clmulh;
21690 break;
21691 case RISCV::BI__builtin_riscv_clmulr_32:
21692 case RISCV::BI__builtin_riscv_clmulr_64:
21693 ID = Intrinsic::riscv_clmulr;
21694 break;
21695
21696 // Zbkx
21697 case RISCV::BI__builtin_riscv_xperm8_32:
21698 case RISCV::BI__builtin_riscv_xperm8_64:
21699 ID = Intrinsic::riscv_xperm8;
21700 break;
21701 case RISCV::BI__builtin_riscv_xperm4_32:
21702 case RISCV::BI__builtin_riscv_xperm4_64:
21703 ID = Intrinsic::riscv_xperm4;
21704 break;
21705
21706 // Zbkb
21707 case RISCV::BI__builtin_riscv_brev8_32:
21708 case RISCV::BI__builtin_riscv_brev8_64:
21709 ID = Intrinsic::riscv_brev8;
21710 break;
21711 case RISCV::BI__builtin_riscv_zip_32:
21712 ID = Intrinsic::riscv_zip;
21713 break;
21714 case RISCV::BI__builtin_riscv_unzip_32:
21715 ID = Intrinsic::riscv_unzip;
21716 break;
21717 }
21718
21719 IntrinsicTypes = {ResultType};
21720 break;
21721 }
21722
21723 // Zk builtins
21724
21725 // Zknh
21726 case RISCV::BI__builtin_riscv_sha256sig0:
21727 ID = Intrinsic::riscv_sha256sig0;
21728 break;
21729 case RISCV::BI__builtin_riscv_sha256sig1:
21730 ID = Intrinsic::riscv_sha256sig1;
21731 break;
21732 case RISCV::BI__builtin_riscv_sha256sum0:
21733 ID = Intrinsic::riscv_sha256sum0;
21734 break;
21735 case RISCV::BI__builtin_riscv_sha256sum1:
21736 ID = Intrinsic::riscv_sha256sum1;
21737 break;
21738
21739 // Zksed
21740 case RISCV::BI__builtin_riscv_sm4ks:
21741 ID = Intrinsic::riscv_sm4ks;
21742 break;
21743 case RISCV::BI__builtin_riscv_sm4ed:
21744 ID = Intrinsic::riscv_sm4ed;
21745 break;
21746
21747 // Zksh
21748 case RISCV::BI__builtin_riscv_sm3p0:
21749 ID = Intrinsic::riscv_sm3p0;
21750 break;
21751 case RISCV::BI__builtin_riscv_sm3p1:
21752 ID = Intrinsic::riscv_sm3p1;
21753 break;
21754
21755 // Zihintntl
21756 case RISCV::BI__builtin_riscv_ntl_load: {
21757 llvm::Type *ResTy = ConvertType(E->getType());
21758 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
21759 if (Ops.size() == 2)
21760 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
21761
21762 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
21764 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
21765 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
21766 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
21767
21768 int Width;
21769 if(ResTy->isScalableTy()) {
21770 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
21771 llvm::Type *ScalarTy = ResTy->getScalarType();
21772 Width = ScalarTy->getPrimitiveSizeInBits() *
21773 SVTy->getElementCount().getKnownMinValue();
21774 } else
21775 Width = ResTy->getPrimitiveSizeInBits();
21776 LoadInst *Load = Builder.CreateLoad(
21777 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
21778
21779 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
21780 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
21781 RISCVDomainNode);
21782
21783 return Load;
21784 }
21785 case RISCV::BI__builtin_riscv_ntl_store: {
21786 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
21787 if (Ops.size() == 3)
21788 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
21789
21790 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
21792 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
21793 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
21794 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
21795
21796 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
21797 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
21798 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
21799 RISCVDomainNode);
21800
21801 return Store;
21802 }
21803
21804 // Vector builtins are handled from here.
21805#include "clang/Basic/riscv_vector_builtin_cg.inc"
21806 // SiFive Vector builtins are handled from here.
21807#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
21808 }
21809
21810 assert(ID != Intrinsic::not_intrinsic);
21811
21812 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
21813 return Builder.CreateCall(F, Ops, "");
21814}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3284
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8427
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9263
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:211
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1275
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6453
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:390
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2141
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2107
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6322
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2598
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9233
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:802
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9226
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:7468
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9453
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7480
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7450
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:8495
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2476
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:443
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:735
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:493
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6449
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7481
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1473
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:7485
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:862
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:630
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2504
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:467
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:770
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9222
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7482
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9300
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6211
constexpr unsigned SVEBitsPerBlock
Definition: CGBuiltin.cpp:9737
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1858
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7292
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6446
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:595
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:510
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:263
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9327
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1704
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1558
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1336
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6458
@ UnsignedAlts
Definition: CGBuiltin.cpp:6416
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6421
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6425
@ Use64BitVectors
Definition: CGBuiltin.cpp:6418
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6413
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6423
@ InventFloatType
Definition: CGBuiltin.cpp:6415
@ AddRetType
Definition: CGBuiltin.cpp:6408
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6410
@ VectorizeRetType
Definition: CGBuiltin.cpp:6412
@ VectorRet
Definition: CGBuiltin.cpp:6422
@ Add1ArgType
Definition: CGBuiltin.cpp:6409
@ Use128BitVectors
Definition: CGBuiltin.cpp:6419
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:676
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:667
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2334
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:810
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1412
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9289
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:547
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:756
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2388
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition: CGBuiltin.cpp:9739
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2527
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6281
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:254
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9315
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:182
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8406
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:72
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8398
@ VolatileRead
Definition: CGBuiltin.cpp:8400
@ NormalRead
Definition: CGBuiltin.cpp:8399
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:348
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:193
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2376
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:308
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:527
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:171
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9255
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7477
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6777
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:235
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:720
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:7543
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:618
static Value * emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:606
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:648
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:479
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2609
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1285
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2342
static Value * emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:585
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:566
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:705
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:246
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1321
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8325
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2101
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:454
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7479
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:7052
CodeGenFunction::ComplexPairTy ComplexPairTy
const Environment & Env
Definition: HTMLLogger.cpp:148
unsigned Iter
Definition: HTMLLogger.cpp:154
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:143
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:423
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:182
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1118
IdentifierTable & Idents
Definition: ASTContext.h:644
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:646
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2063
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2617
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2340
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1091
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:757
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2242
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2245
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
CharUnits getSize() const
getSize - Get the record size in characters.
Definition: RecordLayout.h:193
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3514
QualType getElementType() const
Definition: Type.h:3526
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:149
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:247
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:251
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:122
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2820
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:3011
bool hasStoredFPFeatures() const
Definition: Expr.h:2982
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Expr.cpp:1638
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition: Expr.h:2990
Expr * getCallee()
Definition: Expr.h:2970
FPOptionsOverride getFPFeatures() const
Definition: Expr.h:3102
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2998
arg_range arguments()
Definition: Expr.h:3059
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition: Expr.cpp:1590
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:111
static Address invalid()
Definition: Address.h:153
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:220
CharUnits getAlignment() const
Definition: Address.h:166
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:184
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:241
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:234
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:176
An aggregate value slot.
Definition: CGValue.h:512
Address getAddress() const
Definition: CGValue.h:652
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:864
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:881
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:143
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:388
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:203
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:380
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:180
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:404
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:151
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:397
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:169
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:315
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:436
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:364
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:128
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:189
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:261
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:345
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:259
All available information about a concrete callee.
Definition: CGCall.h:62
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:129
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CGFunctionInfo - Class to encapsulate the information about a function definition.
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:258
void add(RValue rvalue, QualType type)
Definition: CGCall.h:282
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
llvm::CallBase * addControlledConvergenceToken(llvm::CallBase *Input)
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Address makeNaturalAddressForPointer(llvm::Value *Ptr, QualType T, CharUnits Alignment=CharUnits::Zero(), bool ForPointeeType=false, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
Construct an address with the natural alignment of T.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **callOrInvoke, bool IsMustTail, SourceLocation Loc)
EmitCall - Generate a call of the given function, expecting the given result type,...
llvm::Value * FormSVEBuiltinResult(llvm::Value *Call)
FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider vector.
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
llvm::Value * EmitCountedByFieldExpr(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
const FieldDecl * FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset)
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E)
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV, QualType Type, SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
const FieldDecl * FindCountedByField(const FieldDecl *FD)
Find the FieldDecl specified in a FAM's "counted_by" attribute.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:97
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=std::nullopt)
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1632
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:680
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:668
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:352
LValue - This represents an lvalue references.
Definition: CGValue.h:181
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:427
Address getAddress(CodeGenFunction &CGF) const
Definition: CGValue.h:370
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:365
void setNontemporal(bool Value)
Definition: CGValue.h:322
llvm::Value * getPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:361
bool hasInfo() const
Returns true if there is LoopInfo on the stack.
Definition: CGLoopInfo.h:299
const LoopInfo & getInfo() const
Return the LoopInfo for the current loop.
Definition: CGLoopInfo.h:302
Information used when generating a structured loop.
Definition: CGLoopInfo.h:90
llvm::BasicBlock * getHeader() const
Get the header block of this loop.
Definition: CGLoopInfo.h:101
const LoopInfo * getParent() const
Returns the first outer loop containing this loop if any, nullptr otherwise.
Definition: CGLoopInfo.h:115
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:41
static RValue getIgnored()
Definition: CGValue.h:92
static RValue get(llvm::Value *V)
Definition: CGValue.h:97
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:124
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:107
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:70
An abstract representation of an aligned address.
Definition: Address.h:41
llvm::Value * getPointer() const
Definition: Address.h:65
static RawAddress invalid()
Definition: Address.h:60
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:356
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:150
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:140
const T & getABIInfo() const
Definition: TargetInfo.h:56
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:122
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:159
Complex values, per C99 6.2.5p11.
Definition: Type.h:3082
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:4163
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:1946
T * getAttr() const
Definition: DeclBase.h:579
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:599
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:227
DeclContext * getDeclContext()
Definition: DeclBase.h:454
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:413
bool hasAttr() const
Definition: DeclBase.h:583
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:192
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1547
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3064
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3059
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3055
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:825
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3556
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3039
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3918
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:226
Represents difference between two FPOptions values.
Definition: LangOptions.h:915
Represents a member of a struct/union/class.
Definition: Decl.h:3058
Represents a function declaration or definition.
Definition: Decl.h:1971
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2707
Represents a prototype with parameter type info, e.g.
Definition: Type.h:4652
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5381
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:292
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:7204
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3135
QualType getPointeeType() const
Definition: Type.h:3145
A (possibly-)qualified type.
Definition: Type.h:940
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:7439
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2836
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:7481
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2832
The collection of all-type qualifiers we support.
Definition: Type.h:318
Represents a struct/union/class.
Definition: Decl.h:4169
field_range fields() const
Definition: Decl.h:4375
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
bool isUnion() const
Definition: Decl.h:3791
Exposes information about the current target.
Definition: TargetInfo.h:213
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:307
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:666
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1235
bool isLittleEndian() const
Definition: TargetInfo.h:1630
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:830
bool isBigEndian() const
Definition: TargetInfo.h:1629
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1636
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:702
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1813
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1870
bool isBlockPointerType() const
Definition: Type.h:7616
bool isVoidType() const
Definition: Type.h:7901
bool isBooleanType() const
Definition: Type.h:8029
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2134
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:666
bool isArrayType() const
Definition: Type.h:7674
bool isCountAttributedType() const
Definition: Type.cpp:683
bool isPointerType() const
Definition: Type.h:7608
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:7941
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8186
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:694
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8016
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2224
bool isBitIntType() const
Definition: Type.h:7836
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2174
bool isObjCObjectPointerType() const
Definition: Type.h:7740
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2246
bool isFloatingType() const
Definition: Type.cpp:2237
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2184
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8119
bool isRecordType() const
Definition: Type.h:7702
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1874
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:706
QualType getType() const
Definition: Decl.h:717
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:3965
unsigned getNumElements() const
Definition: Type.h:3980
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:181
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: Integral.h:29
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:923
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.h:217
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1867
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:348
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:489
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1385
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1707
The JSON file list parser is used to communicate input to InstallAPI.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:151
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
const FunctionProtoType * T
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
unsigned long uint64_t
long int64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:21
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:176
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:168
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:159
uint64_t Width
Definition: ASTContext.h:153
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742